diff options
author | Henrik Edin <henrik.edin@mongodb.com> | 2019-01-09 12:54:11 -0500 |
---|---|---|
committer | Henrik Edin <henrik.edin@mongodb.com> | 2019-01-25 13:37:26 -0500 |
commit | cb7c8ceba4bf7d6007a250799bfa35c129dd3e58 (patch) | |
tree | 1a9285e804767841de1b664150c73abd36e2f45c /src | |
parent | 4f06ba60e0fef61a4136c4c77ae4e98627e16ffa (diff) | |
download | mongo-cb7c8ceba4bf7d6007a250799bfa35c129dd3e58.tar.gz |
SERVER-37996 Add stock gperftools 2.7 as an allocator option activated with --allocator=tcmalloc-experimental
Diffstat (limited to 'src')
233 files changed, 78500 insertions, 7 deletions
diff --git a/src/mongo/util/tcmalloc_server_status_section.cpp b/src/mongo/util/tcmalloc_server_status_section.cpp index 85ac74ffc4a..ebd02e64a73 100644 --- a/src/mongo/util/tcmalloc_server_status_section.cpp +++ b/src/mongo/util/tcmalloc_server_status_section.cpp @@ -55,7 +55,7 @@ namespace { // a long time. const int kManyClients = 40; -stdx::mutex tcmallocCleanupLock; +MONGO_COMPILER_VARIABLE_UNUSED stdx::mutex tcmallocCleanupLock; MONGO_EXPORT_SERVER_PARAMETER(tcmallocEnableMarkThreadTemporarilyIdle, bool, false); diff --git a/src/third_party/SConscript b/src/third_party/SConscript index 4a7d44ebdfd..aa9785ce7d6 100644 --- a/src/third_party/SConscript +++ b/src/third_party/SConscript @@ -14,7 +14,7 @@ pcreSuffix = "-8.42" mozjsSuffix = '-45' yamlSuffix = '-0.6.2' icuSuffix = '-57.1' -gperftoolsSuffix = '-2.5' +gperftoolsSuffix = '-2.7' if env['MONGO_ALLOCATOR'] == "tcmalloc-experimental" else '-2.5' timelibSuffix = '-2018.01' tomcryptSuffix = '-1.18.2' benchmarkSuffix = '-1.4.1' @@ -324,7 +324,7 @@ if "tom" in env["MONGO_CRYPTO"]: ]) gperftoolsEnv = env -if (gperftoolsEnv['MONGO_ALLOCATOR'] == "tcmalloc"): +if (gperftoolsEnv['MONGO_ALLOCATOR'] in ["tcmalloc", "tcmalloc-experimental"]): if use_system_version_of_library("tcmalloc"): gperftoolsEnv = env.Clone( SYSLIBDEPS=[ diff --git a/src/third_party/gperftools-2.7/AUTHORS b/src/third_party/gperftools-2.7/AUTHORS new file mode 100644 index 00000000000..3995ed4cf57 --- /dev/null +++ b/src/third_party/gperftools-2.7/AUTHORS @@ -0,0 +1,2 @@ +google-perftools@googlegroups.com + diff --git a/src/third_party/gperftools-2.7/COPYING b/src/third_party/gperftools-2.7/COPYING new file mode 100644 index 00000000000..e4956cfd9fd --- /dev/null +++ b/src/third_party/gperftools-2.7/COPYING @@ -0,0 +1,28 @@ +Copyright (c) 2005, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/third_party/gperftools-2.7/ChangeLog b/src/third_party/gperftools-2.7/ChangeLog new file mode 100644 index 00000000000..67fe9782d00 --- /dev/null +++ b/src/third_party/gperftools-2.7/ChangeLog @@ -0,0 +1,12430 @@ +commit 9608fa3bcf8020d35f59fbf70cd3cbe4b015b972 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 29 16:55:59 2018 -0700 + + bumped version to 2.7 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit db890ccfade3cf054ec330dfb4a9ddfab971367e +Author: HolyWu <holywu@gmail.com> +Date: Mon Apr 30 12:08:51 2018 +0800 + + Clean up src/windows/config.h + +M src/windows/config.h + +commit 497ea331654f7ea9e4e6405f5beaea8ebcc9d10b +Author: HolyWu <holywu@gmail.com> +Date: Sun Apr 15 22:47:17 2018 +0800 + + Fix WIN32_OVERRIDE_ALLOCATORS for VS2017 + + At first I try to add some functions as what Chrome does at their + https://chromium.googlesource.com/chromium/src/+/master/base/allocator/allocator_shim_override_ucrt_symbols_win.h, + but it still fails. So I decide to remove all heap-related objects + from libucrt.lib to see what happens. At the end I find that a lot of + functions in the CRT directly invoke _malloc_base instead of + malloc (and the others alike), hence we need to override them as well. + + This should close issue #716. + + [alkondratenko@gmail.com: added reference to ticket] + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M src/libc_override.h +M src/tcmalloc.cc +M src/windows/override_functions.cc +M src/windows/port.h + +commit ebc85cca9083241ae360e0c68e2f1d7538001fa3 +Author: HolyWu <holywu@gmail.com> +Date: Tue Apr 17 11:40:49 2018 +0800 + + Enable aligned new/delete declarations on Windows when applicable + +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit a3badd6d2127c161f6fbad853fccd6e8064fcde7 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 29 16:34:54 2018 -0700 + + Really fix CheckAddressBits compilation warning + + It looks like previous fix introduced another warning on gcc for + i386. Somehow it barked on kAddressBits shift even that shift was dead + code. + + Anyways, we now avoid possibility of undefined shift even + stronger. And it actually removes compile warning on all compilers I + tested. + +M src/system-alloc.cc + +commit 7c718fe1768273f94acf2bd43cfa1f44a4b9bdcc +Author: HolyWu <holywu@gmail.com> +Date: Thu Apr 19 13:49:28 2018 +0800 + + Add tests for sized deallocation + +M src/tests/tcmalloc_unittest.cc + +commit 30e5e614a846d5f046454341f108bc4cb2b70189 +Author: Fabrice Fontaine <fontaine.fabrice@gmail.com> +Date: Sun Apr 22 19:17:12 2018 +0200 + + Fix build without static libraries + + Only add -static to malloc_bench_LDFLAGS and binary_trees_LDFLAGS if + ENABLE_STATC is set otherwise build with some compilers will fail if + user has decided to build only the shared version of gperftools + libraries + + Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com> + +M Makefile.am + +commit 836c4f29a585e4b176738f64cf8d312171be5fe0 +Author: Ben Dang <me@bdang.it> +Date: Fri Apr 13 10:54:05 2018 -0700 + + Update documentation for heap_checker.html + + Make it clear that the static methods used require the "namespace", + `HeapLeakChecker::`. + +M docs/heap_checker.html + +commit e47d0d1c5142a4ad1203e58db31af2f58bafcda9 +Author: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> +Date: Tue Apr 10 15:56:52 2018 -0300 + + powerpc: Re-enable VDSO support + + Powerpc requires VDSO support in order to produce a stack trace. + Without this, it isn't possible to complete a build. + Tested on powerpc, powerpc64 and powerpc64le. + +M src/base/vdso_support.h + +commit 0a66dd3a6ac60e57d9c01f17309a40e2ebe06837 +Author: Christoph Müllner <christophm30@gmail.com> +Date: Sat Apr 7 09:00:09 2018 +0200 + + linux: add aarch64_ilp32 support. + + This patch adds aarch64 (arm64) ILP32 support by using + the proper syscalls for sys_fcntl(), sys_fstat(), sys_lseek() + and sys_mmap(). + + Signed-off-by: Christoph Müllner <christophm30@gmail.com> + +M src/base/linux_syscall_support.h + +commit 05dff0966362932631a2120eaeb8eebb50dd9300 +Author: Christoph Müllner <christophm30@gmail.com> +Date: Sat Apr 7 08:30:32 2018 +0200 + + Fix signature of sbrk. + + The manpage of sbrk says that its argument is of type intptr_t. + This patch fixes a compiler warning on gcc 7.3.0. + + Signed-off-by: Christoph Müllner <christophm30@gmail.com> + +M src/malloc_hook_mmap_linux.h + +commit 33ae0ed2ae7ce143276658be029c28ec6f2a73c9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Apr 9 20:57:29 2018 -0700 + + unbreak compilation on GNU/Linux i386 + + Recent commit to fix int overflow for implausibly huge allocation + added call to std::min. Notably, first arg was old size divided by + unsigned long 4. And on GNU/Linux i386 size_t is not long. So such + division was promoting first arg to unsigned long while second arg was + still size_t, so just unsigned. And that caused compilation to fail. + + Fix is droping 'ul'. + +M src/tcmalloc.cc + +commit 977e0d45003514d088e4bb83474c985094acee47 +Author: Christoph Müllner <christophm30@gmail.com> +Date: Sat Apr 7 08:35:42 2018 +0200 + + Remove not needed header in vdso_support.cc. + + Signed-off-by: Christoph Müllner <christophm30@gmail.com> + +M src/base/vdso_support.cc + +commit 36bfa9a4046109efa40ccc0806c72331de9c915b +Author: Mao <littlecvr@gmail.com> +Date: Mon Mar 26 17:22:19 2018 +0800 + + Enable tcmalloc VDSO support only on x86 to reduce static initializers + + Background context + ------------------ + crrev.com/1466173002 switched the GN tcmalloc target from source_set + -> static_library. There are good reasons for keeping tcmalloc a + source_set (see "Note on static libraries" in [1]). However, in the + current state source_set was exposing extra static initializers in the + GN build which, are not present in the gyp build due to the linker gc + sections. + + Resolution of this CL + --------------------- + The fact that vdso_support.cc is GC-ed by the linker is the symptom + that such code is unreachable. A search in the codebase shows that the + only client is stacktrace_x86-inl.h, which depends on VDSO only when + defined(__linux__) && defined(__i386__) This CL is therefore matching + this condition in vdso_support.h and conditioning the #define + HAVE_VDSO_SUPPORT with the same conditions. + + [1] + https://chromium.googlesource.com/chromium/src/+/master/tools/gn/docs/cookbook.md + + References: + https://bugs.chromium.org/p/chromium/issues/detail?id=559766 + https://bugs.chromium.org/p/chromium/issues/detail?id=564618 + +M src/base/vdso_support.h + +commit 1cb5de6db9f30daa56a0b54652a7ff6da288c1bd +Author: Mao <littlecvr@gmail.com> +Date: Mon Mar 26 17:28:28 2018 +0800 + + Explicitly prevent int overflow + +M src/tcmalloc.cc + +commit 8f63f2bb983e93b57e24a05964104f1d833c575b +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 24 21:15:01 2018 -0700 + + Correctly detect presence of various functions in tcmalloc.h + + This fixes test miscompilations on some clang 6.0 + installations. Particularly issue #946. + + Defines tested by tcmalloc.h where totally wrong defines. configure + actually puts defines of HAVE_DECL_FOOBAR kind in config.h. Which is + what we're using now. + +M src/tcmalloc.h + +commit 736648887b0b27ec70a50f0954a614dd65c1eb12 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 24 15:54:48 2018 -0700 + + Don't test OOM handling of debugallocator + + This may fix issue #969. + + When testing OOM handling we set up sys allocator that fails memory + allocation. But debugallocator itself allocates some internal metadata + memory via malloc and crashes if those allocations fail. So + occasionally this test failed when debugallocator's internal malloc + ended up causing sys allocator. + + So instead of failing tests from time to time, we drop it for debug + allocator. It's OOM handling is already crashy anyways. + +M src/tests/tcmalloc_unittest.cc + +commit c4a8e00da41c43da7e78858e06f3153f09f6967d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 24 19:09:15 2018 -0700 + + Fix warning about one of CheckAddressBits functions unused + +M src/system-alloc.cc + +commit 47c99cf492fa340671fa89afec263b8d6e859755 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 24 18:29:06 2018 -0700 + + unbreak printing large span stats + + One of recent commits started passing kMaxPages to printf but not used + it. Thankfully compilers gave us warning. Apparently intention was to + print real value of kMaxPages, so this is what we're doing now. + +M src/tcmalloc.cc + +commit 34f78a2dcd8160b483d86474702785337bbeb7ac +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Mar 18 16:39:06 2018 -0700 + + bumped version to 2.7rc + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit db98aac55a90d45e9abe188c8a928c3561d3f223 +Author: Todd Lipcon <todd@cloudera.com> +Date: Sat Mar 17 09:43:34 2018 -0700 + + Add a central free list for kMaxPages-sized spans + + Previously, the central free list with index '0' was always unused, + since freelist index 'i' tracked spans of length 'i' and there are no + spans of length 0. This meant that there was no freelist for spans of + length 'kMaxPages'. In the default configuration, this corresponds to + 1MB, which is a relatively common allocation size in a lot of + applications. + + This changes the free list indexing so that index 'i' tracks spans of + length 'i + 1', meaning that free list index 0 is now used and + freelist[kMaxPages - 1] tracks allocations of kMaxPages size (1MB by + default). + + This also fixes the stats output to indicate '>128' for the large + spans + stats rather than the incorrect '>255' which must have referred to a + historical value of kMaxPages. + + No new tests are added since this code is covered by existing tests. + +M docs/pageheap.dot +M docs/pageheap.gif +M docs/tcmalloc.html +M src/common.h +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc + +commit d7be9385609328a03c5cfe775473a7dc8e014fd3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Mar 4 23:29:46 2018 -0800 + + implement more robust detection of sized delete support + + As reported in issue #954, osx clang compiler is able to optimize our + previous detection away while not really having runtime support for + sized delete. So this time we use AC_LINK_IFELSE and more robust code + to prevent compiler from optimizing away sized delete call. This + should reliably catch "bad" compilers. + + Special thanks to Alexey Serbin for reporting the issue, suggesting a + fix and verifying it. + + Fixes issue #954. + +M configure.ac + +commit f1d3fe4a21e339a3fd6e4592ee7444484a7b92dc +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 25 13:55:53 2018 -0800 + + refactored handling of reverse span set iterator for correctness + + I.e. no more questionable memcpy and we run iterator's destructor when + we remove span from SpanSet. + +M src/page_heap.cc +M src/span.h + +commit 59c77be0fad2a49e31d51877985e7c48f73afcea +Author: Todd Lipcon <todd@cloudera.com> +Date: Sun Feb 11 16:21:42 2018 -0800 + + Update docs for central page heap to reflect tree + +M docs/pageheap.dot +M docs/pageheap.gif +M docs/tcmalloc.html + +commit 06c9414ec423ffe442c047b2560555f9d5847b1d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Jun 3 15:31:06 2017 -0700 + + Implemented O(log n) searching among large spans + + This is implemented via std::set with custom STL allocator that + delegates to PageHeapAllocator. Free large spans are not linked + together via linked list, but inserted into std::set. Spans also store + iterators to std::set positions pointing to them. So that removing + span from set is fast too. + + Patch implemented by Aliaksey Kandratsenka and Todd Lipcon based on + earlier research and experimentation by James Golick. + + Addresses issue #535 + + [alkondratenko@gmail.com: added Todd's fix for building on OSX] + [alkondratenko@gmail.com: removed unnecessary Span constructor] + [alkondratenko@gmail.com: added const for SpanSet comparator] + [alkondratenko@gmail.com: added operator != for STLPageHeapAllocator] + +M src/page_heap.cc +M src/page_heap.h +M src/page_heap_allocator.h +M src/span.h +M src/tests/tcmalloc_unittest.cc + +commit a42e44738a330783781541411392ba27df88b8b3 +Author: Ishan Arora <ishan.arora@gravitonresearch.com> +Date: Tue Jan 9 01:33:13 2018 +0530 + + typo in docs/tcmalloc.html + +M docs/tcmalloc.html + +commit 71bf09aabe93dbe390ebb54c57d3c5b3d627c142 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Dec 9 09:58:31 2017 -0800 + + bumped version to 2.6.3 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 0bccb5e658762f30e6e3abbc40dc244e8f8911c1 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Dec 9 08:51:19 2017 -0800 + + fix malloc fast path for patched windows functions + + malloc_fast_path now receives oom function instead of full allocation + function and windows/patch_function.cc wasn't updated until now. It + caused assertion failures as reported in issue #944. + +M src/windows/patch_functions.cc + +commit 8b1d13c631024ce7e80508c284c7bc5b1327e131 +Author: Stephan Zuercher <stephan@turbinelabs.io> +Date: Tue Dec 5 09:49:36 2017 -0800 + + configure.ac: use link check for std::align_val_t + +M configure.ac + +commit 36ab068baa3b07c083bd79029622aa382aeff84a +Author: Stephan Zuercher <stephan@turbinelabs.io> +Date: Tue Dec 5 09:31:38 2017 -0800 + + configure.ac: better test for -faligned-new + + XCode 9 provides only partial support for aligned new/delete when + -faligned-new + is specified. Require successful linking to enable aligned new/delete. + +M configure.ac + +commit 6a4b0799975576c2722daa75b35f6f65220d22b6 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Nov 30 18:19:17 2017 +0000 + + bumped version to 2.6.2 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 22917145186bc8bbb6d98090d788ed7c87b282b1 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Nov 30 16:36:33 2017 +0000 + + implement fast-path for memalign/aligned_alloc/tc_new_aligned + + We're taking advantage of "natural" alignedness of our size classes + and instead of previous loop over size classes looking for suitably + aligned size, we now directly compute right size. See align_size_up + function. And that gives us ability to use our existing malloc + fast-path to make memalign neat and fast in most common + cases. I.e. memalign/aligned_alloc now only tail calls and thus avoids + expensive prologue/epilogue and is almost as fast as regular malloc. + +M src/common.cc +M src/tcmalloc.cc + +commit 8b9728b02325a9bee9afd3957e506c111ed40d40 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Nov 30 18:12:32 2017 +0000 + + add memalign benchmark to malloc_bench + +M benchmark/malloc_bench.cc + +commit 79c91a9810c928d7f6d0cb168c10ce470a714aa8 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Wed Nov 29 21:15:04 2017 +0000 + + always define empty PERFTOOLS_NOTHROW + + Because somehow clang still builds "this function will not throw" code + even with noexcept. Which breaks performance of + tc_malloc/tc_new_nothrow. The difference with throw() seems to be just + which function is called when unexpected exception happens. + + So we work around this sillyness by simply dropping any exception + specification when compiling tcmalloc. + +M src/tcmalloc.cc + +commit 03da6afff57c12845260997213b6ad89e420bab3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Wed Nov 29 20:19:07 2017 +0000 + + unbreak throw declarations on operators new/delete + + We now clearly separate PERFTOOLS_NOTHROW (used for tc_XXX functions) + and throw()/noexcept (used for operators we define). + + The former is basically "nothrow() for our callers, nothing for + us". It is roughly equivalent of what glibc declares for malloc and + friends. If some exception-full C++ code calls such function it + doesn't have to bother setting up exception handling around such + call. Notably, it is still important for those functions to _not have + throw() declarations when we're building tcmalloc. Because C++ throw() + requires setting up handling of unexpected exceptions thrown from + under such functions which we don't want. + + The later is necessary to have operators new/delete definitions have + "correct" exception specifications to calm down compiler + warnings. Particularly older clang versions warn if new/delete aren't + defined with correct exception specifications. Also this commit fixes + annoying gcc 7+ warning (and gnu++14 mode) that complains about + throw() being deprecated. + +M src/base/basictypes.h +M src/libc_override.h +M src/libc_override_gcc_and_weak.h +M src/libc_override_redefine.h +M src/tests/tcmalloc_unittest.cc +M src/windows/get_mangled_names.cc + +commit 89fe59c8318b2f8cec8ce00182dd24c357252c96 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Wed Nov 29 13:37:50 2017 +0000 + + Fix OOM handling in fast-path + + Previous fast-path malloc implementation failed to arrange proper oom + handling for operator new. I.e. operator new is supposed to call new + handler and throw exception, which was not arranged in fast-path case. + + Fixed code now passes pointer for oom function to + ThreadCache::FetchFromCentralCache which will call it in oom + condition. Test is added to verify correct behavior. + + I've also updated some fast-path-related comments for more accuracy. + +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit a29a0cf348e131d5b8ec26c39dabeac89cf13fcd +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Nov 28 14:43:11 2017 +0000 + + delete-trailing-whitespace on thread_cache.* + +M src/thread_cache.cc +M src/thread_cache.h + +commit e6cd69bdecde074c3c7a52ccb099eafcecf668dc +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Wed Nov 29 19:49:18 2017 +0000 + + reintroduce aliasing for aligned delete + + Without aliasing performance is likely to be at least partially + affected. There is still concern that aliasing between functions of + different signatures is not 100% safe. We now explicitly list of + architectures where aliasing is known to be safe. + +M src/tcmalloc.cc + +commit fb30c3d435c4712785a50f8a422c76ad490a12b2 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Wed Nov 29 19:10:23 2017 +0000 + + fully disable aligned new on windows for now + +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 7efb3ecf37d88edf9cf9a43efb89b425eaf81d5e +Author: Andrey Semashev <andrey.semashev@gmail.com> +Date: Sat Sep 30 17:47:35 2017 +0300 + + Add support for C++17 operator new/delete for overaligned types. + + - Add auto-detection of std::align_val_t presence to configure + scripts. This + indicates that the compiler supports C++17 operator new/delete + overloads + for overaligned types. + + - Add auto-detection of -faligned-new compiler option that appeared + in gcc 7. + The option allows the compiler to generate calls to the new + operators. It is + needed for tests. + + - Added overrides for the new operators. The overrides are enabled + if the + support for std::align_val_t has been detected. The implementation + is mostly + based on the infrastructure used by memalign, which had to be + extended to + support being used by C++ operators in addition to C functions. In + particular, + the debug version of the library has to distinguish memory + allocated by + memalign from that by operator new. The current implementation + of sized + overaligned delete operators do not make use of the supplied + size argument + except for the debug allocator because it is difficult to calculate + the exact + allocation size that was used to allocate memory with + alignment. This can be + done in the future. + + - Removed forward declaration of std::nothrow_t. This was not + portable as + the standard library is not required to provide nothrow_t + directly in + namespace std (it could use e.g. an inline namespace within + std). The <new> + header needs to be included for std::align_val_t anyway. + + - Fixed operator delete[] implementation in libc_override_redefine.h. + + - Moved TC_ALIAS definition to the beginning of the file in + tcmalloc.cc so that + the macro is defined before its first use in nallocx. + + - Added tests to verify the added operators. + + [alkondratenko@gmail.com: fixed couple minor warnings, and some + whitespace change] + [alkondratenko@gmail.com: removed addition of TC_ALIAS in debug + allocator] + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M Makefile.am +M configure.ac +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/libc_override_gcc_and_weak.h +M src/libc_override_redefine.h +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 7a6e25f3b143c35ed6fff8bb2c0f8de1ab348a39 +Author: Andrew Morrow <acm@mongodb.com> +Date: Tue Nov 7 15:57:17 2017 -0500 + + Add new statistics for the PageHeap + + [alkondratenko@gmail.com: addressed init order mismatch warning] + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc + +commit 6e3a702fb9c86eb450f22b326ecbceef4b0d6604 +Author: Jianbo Yang <jianbyan@microsoft.com> +Date: Tue Oct 17 01:09:18 2017 +0800 + + Fix data race setting size_left_ in ThreadCache::SetMaxSize + + This commit is to fix the data race in ThreadCache::SetMaxSize. + ThreadCache::size_left_ is removed and ThreadCache::size_ is + added. ThreadCache::size_left_ was introduced for optimization. + It is updated in several functions of ThreadCache, including the + ThreadCache::SetMaxSize. But thread A can update size_left_ of + thread B via SetMaxSize without protection or synchronization. + + There should not be data race around ThreadCache::size_, for it + isn't accessed by multi threads. + + The optimization of tail-call in tc_{malloc, new, free} is kept + and no other logics are affected. + +M src/thread_cache.cc +M src/thread_cache.h + +commit 235471f96564aa95354839bb135661e561966001 +Author: cs-lee <sollcs.star@gmail.com> +Date: Sun Aug 6 04:57:42 2017 -0700 + + fix memory leak in Symbolize function + + [alkondratenko@gmail.com: reworded commit message] + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M src/symbolize.cc + +commit 47efdd60f596f7180689337cbba2dab35539ba98 +Author: cs-lee <sollcs.star@gmail.com> +Date: Sun Aug 6 04:51:52 2017 -0700 + + Added mising va_end() in TracePrintf function + + Normally the va_end function does not do anything, + but it should be called because some platforms need it. + + [alkondratenko@gmail.com: reworded commit message] + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M src/debugallocation.cc + +commit 497b60ef0fa93ed41ccacf6ffd0d536789401d29 +Author: Vladimir <Volodimir.Krylov@gmail.com> +Date: Fri Jul 14 15:13:32 2017 +0300 + + Implemented GetProgramInvocationName on FreeBSD + + Few lines of code was taken from + /usr/src/contrib/libexecinfo/backtrace.c + + [alkondratenko@gmail.com: updated commit message + Signed-off-by: Aliaksey Kandratsenka <alkondratenko@gmail.com> + +M src/symbolize.cc + +commit ac072a3fc768b9699b1e809a379f576aa3e48983 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Sep 23 14:55:33 2017 -0700 + + Revert "Ignore current_instance heap allocation when leak sanitizer + is enabled" + + This reverts commit 70a35422b5509a456584b132ad8ce4466af323ea. + +M Makefile.am +M src/malloc_extension.cc + +commit fb5987d57913867e3b0f20490461ea55bfabe9ee +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Sep 23 14:55:20 2017 -0700 + + Revert "Ensure that lsan flags are appended on all necessary targets" + + This reverts commit a3bf61ca81b68e7792739c451aceef00cf7d7d03. + +M Makefile.am + +commit 5815f02105acd5d7fd0e6ec038a9e96ccc36911f +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Sep 16 21:14:23 2017 -0700 + + Use safe getenv for setting up backtrace capturing method + + This code runs very early, so using special "early" version of getenv + is reasonable. It should fix issue #912. + +M src/stacktrace.cc + +commit aab4277311759b32ae8c442b49edf002230ea165 +Author: Dorin Lazăr <dorin.lazar@gmail.com> +Date: Thu Sep 21 22:11:49 2017 +0300 + + Fixed LTO warning about the mismatch between return values for + ProfilingIsEnabledForAllThreads() + +M src/heap-checker.cc + +commit d406f2285390c402e824dd28e6992f7f890dcdf9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Aug 5 22:05:10 2017 -0700 + + implement support for C11 aligned_alloc + + Just like glibc does, we simply alias it to memalign. + +M Makefile.am +M src/libc_override_gcc_and_weak.h +M src/libc_override_redefine.h + +commit 92a27e41a137704772479277786d035763938921 +Author: Piotr Sikora <piotrsikora@google.com> +Date: Mon Aug 21 15:06:23 2017 -0700 + + Fix build on macOS. + + Fixes #910. + + Signed-off-by: Piotr Sikora <piotrsikora@google.com> + +M Makefile.am + +commit e033431e5a9e02e4d1882bb6fcd212c4fab56173 +Author: Khem Raj <raj.khem@gmail.com> +Date: Sun Jul 16 19:28:17 2017 -0700 + + include fcntl.h for loff_t definition + + Fixes + linux_syscall_support.h:2641:26: error: 'loff_t' has not been declared + + Signed-off-by: Khem Raj <raj.khem@gmail.com> + +M src/base/linux_syscall_support.h + +commit e41bc414048eb4a37743e294bed3f1c63a1fa559 +Author: Khem Raj <raj.khem@gmail.com> +Date: Sat Jul 1 13:21:21 2017 -0700 + + Use ucontext_t instead of struct ucontext + + Newer glibc has dropped the ucontext tag from exposing + + Signed-off-by: Khem Raj <raj.khem@gmail.com> + +M src/stacktrace_powerpc-linux-inl.h + +commit bf840dec0495e17f5c8403e68e10b9d6bf05c559 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jul 9 14:55:33 2017 -0700 + + bumped version to 2.6.1 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 2d220c7e26bd279ec87f9d1fc694ca5114b20bee +Author: Romain Geissler <romain.geissler@amadeus.com> +Date: Sun Jun 18 15:21:19 2017 +0200 + + Replace "throw()" by "PERFTOOLS_NOTHROW" + + Automatically done with: + sed -e 's/\<throw[[:space:]]*([[:space:]]*)/PERFTOOLS_NOTHROW/g' -i + $(git grep -l 'throw[[:space:]]*([[:space:]]*)') + + [alkondratenko@gmail.com: updated to define empty PERFTOOLS_NOTHROW + only on pre-c++11 standards] + +M src/libc_override_gcc_and_weak.h +M src/libc_override_redefine.h +M src/tcmalloc.cc +M src/windows/get_mangled_names.cc + +commit c4de73c0e69b9a75b6795fdd4598234baed8496d +Author: Romain Geissler <romain.geissler@amadeus.com> +Date: Sun Jun 18 15:16:16 2017 +0200 + + Add PERFTOOLS_THROW where necessary (as detected by GCC). + +M src/base/basictypes.h +M src/libc_override_gcc_and_weak.h +M src/tests/tcmalloc_unittest.cc + +commit e5fbd0e24e19bf3c9a8777cfc78e6188c5557025 +Author: Romain Geissler <romain.geissler@amadeus.com> +Date: Sun Jun 18 15:05:46 2017 +0200 + + Rename PERFTOOLS_THROW into PERFTOOLS_NOTHROW. + + Automatically done with: + sed -e 's/\<PERFTOOLS_THROW\>/PERFTOOLS_NOTHROW/g' -i $(git grep + -l PERFTOOLS_THROW) + +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/tcmalloc.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit eeb7b84c20146c0e2e039ce72a2ea083a94ba80d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Jul 8 15:23:37 2017 -0700 + + Register tcmalloc atfork handler as early as possible + + This is what other mallocs do (glibc malloc and jemalloc). The idea is + malloc is usually initialized very eary. So if we register atfork + handler at that time, we're likely to be first. And that makes our + atfork handler a bit safer, since there is much less chance of some + other library installing their "take all locks" handler first and + having fork take malloc lock before library's lock and deadlocking. + + This should address issue #904. + +M src/static_vars.cc +M src/static_vars.h +M src/thread_cache.cc + +commit 208c26caeff24fd8c1defea80cb723ec1ab4bf06 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Jul 4 19:50:04 2017 -0700 + + Add initial syscall support for mips64 32-bit ABI + + This applies patch by Adhemerval Zanella from + https://github.com/gperftools/gperftools/issues/845. + + Only malloc (i.e. tcmalloc_minimal) was tested to work so far. + +M src/base/linux_syscall_support.h +M src/malloc_hook_mmap_linux.h + +commit a3bf61ca81b68e7792739c451aceef00cf7d7d03 +Author: Francis Ricci <francisjricci@gmail.com> +Date: Wed Jul 5 12:39:41 2017 -0400 + + Ensure that lsan flags are appended on all necessary targets + +M Makefile.am + +commit 97646a193237af0f941e5bd3078eb72edbc2d548 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Jul 4 21:02:34 2017 -0700 + + Add missing NEWS entry for recent 2.6 release + + Somehow I managed to miss this last commit in 2.6 release. So lets add + it now even if it is too late. + +M NEWS + +commit 4be05e43a1adab9ceea9bdaaae546fb938f0a86c +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Jul 4 20:35:25 2017 -0700 + + bumped version up to 2.6 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 70a35422b5509a456584b132ad8ce4466af323ea +Author: Francis Ricci <francisjricci@gmail.com> +Date: Thu Jun 15 11:04:33 2017 -0400 + + Ignore current_instance heap allocation when leak sanitizer is enabled + + Without this patch, any user program that enables LeakSanitizer will + see a leak from tcmalloc. Add a weak hook to __lsan_ignore_object, + so that if LeakSanitizer is enabled, the allocation can be ignored. + +M Makefile.am +M src/malloc_extension.cc + +commit 6eca6c64fabbff55c43a78537e466312524b3acd +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Jul 1 18:48:58 2017 -0700 + + Revert "issue-654: [pprof] handle split text segments" + + This reverts commit 8c3dc52fcfe02412a529769a22cbc75388a5d368. + + People have reported issues with this so lets stay safe and use older + even if less powerful code. + +M src/pprof + +commit a495969cb6157cc361fc03228ae12ff7c0ecac4b +Author: KernelMaker <songzhao.asm@icloud.com> +Date: Wed May 24 16:46:49 2017 +0800 + + update the prev_class_size in each loop, or the min_object_size of + tcmalloc.thread will always be 1 when calling GetFreeListSizes + +M src/tcmalloc.cc + +commit 163224d8af5abdbbd8208ad6d37f13fb450af46f +Author: Kim Gräsman <kim.grasman@gmail.com> +Date: Fri May 26 13:09:03 2017 +0200 + + Document HEAPPROFILESIGNAL environment variable + +M docs/heapprofile.html + +commit 5ac82ec5b96d24219efd4c8aec47a45466eabd00 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 29 13:07:39 2017 -0700 + + added stacktrace capturing benchmark + +M .gitignore +M Makefile.am +A benchmark/getcontext_light.cc +A benchmark/unwind_bench.cc + +commit c571ae2fc9433e958f29b3c3525d34c22a9cb884 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 22 19:04:20 2017 -0700 + + 2.6rc4 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit f2bae51e7e609855c26095f14ffbb84082694acb +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 22 18:58:15 2017 -0700 + + Revert "Revert "disable dynamic sized delete support by default"" + + This reverts commit b82d89cb7c8781a6028f6f5959cabdc5a273aec3. + + Dynamic sized delete support relies on ifunc handler being able to + look up environment variable. The issue is, when stuff is linked with + -z now linker flags, all relocations are performed early. And sadly + ifunc relocations are not treated specially. So when ifunc handler + runs, it cannot rely on any dynamic relocations at all, otherwise + crash is real possibility. So we cannot afford doing it until (and if) + ifunc is fixed. + + This was brought to my attention by Fedora people at + https://bugzilla.redhat.com/show_bug.cgi?id=1452813 + +M configure.ac + +commit 6426c0cc8049dd50a681dc337ac9962577d5fa14 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 22:55:29 2017 -0700 + + 2.6rc3 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 0c0e2fe43b5095d19470abdc3b4d83613fe37c6d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 22 02:39:29 2017 -0700 + + enable 48-bit page map on msvc as well + +M src/common.h + +commit 83d6818295cb337b6201fe307e15755df9dcd47f +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 22 02:38:13 2017 -0700 + + speed up 3-level page map access + + There is no need to have pointer indirection for root node. This also + helps the case of early free of garbage pointer because we didn't + check root_ pointer for NULL. + +M src/pagemap.h + +commit f7ff175b92df5577aef9caf3fb02f9ff37b822ca +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 23:28:48 2017 -0700 + + add configure-time warning on unsupported backtrace capturing + + Both libgcc and libc's backtrace() are not really options for stack + trace capturing from inside profiling signal handler. So lets warn + people. + +M configure.ac + +commit cef582350c952bad761476d01ea64bb4087371ce +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 23:15:59 2017 -0700 + + align fast-path functions only if compiler supports that + + Apparently gcc only supports __attribute__((aligned(N))) on functions + only since version 4.3. So lets test it in configure script and only + use when possible. We now use CACHELINE_ALIGNED_FN macro for aligning + functions. + +M configure.ac +M src/base/basictypes.h +M src/tcmalloc.cc + +commit bddf862b189c4508d5212f6e0e8ea81c4dd18811 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 22:24:19 2017 -0700 + + actually support very early freeing of NULL + + This was caught by unit tests on centos 5. Apparently some early + thingy is trying to do vprintf which calls free(0). Which used to + crash since before size class cache is initialized it'll report + hit (with size class 0) for NULL pointer, so we'd miss the case of + checking NULL pointer free and crash. + + The fix is to check for IsInited in the case when thread cache is + null, and if so then we escalte to free_null_or_invalid. + +M src/tcmalloc.cc +M src/thread_cache.cc + +commit 07a124d8c16bc7d52524ceb9f50d7a65b868e129 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 22:22:21 2017 -0700 + + don't use arg-ful constructor attribute for early nallocx test + + 101 is not very early anyways and arg-ful constructor attribute is + only supported since gcc 4.3 (and e.g. rhel 5's compiler fails to + compile it). So there seems to be very little value trying to ask for + priority of 101. + +M src/tests/tcmalloc_unittest.cc + +commit 5346b8a4def33915e6f877e308fb948f570a6120 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 21 22:18:01 2017 -0700 + + don't depend on SIZE_MAX definition in sampler.cc + + It was reported that SIZE_MAX isn't getting defined in C++ mode when + C++ standard is less than c++11. Because we still want to support + non-c++11 systems (for now), lets make it simple and not depend on + SIZE_MAX (original google-internal code used + std::numeric_limits<ssize_t>::max, but that failed to compile on + msvc). + + Fixes issue #887 and issue #889. + +M src/sampler.cc + +commit 50125d8f70007fb2aa4d1c87df6a3092a3371edf +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon May 15 00:02:43 2017 -0700 + + 2.6rc2 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit a5e8e42a47fc0175c8044d0d77bf192b03347964 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 14 23:45:08 2017 -0700 + + don't link-in libunwind if libunwind.h is missing + + I got report that some build environments for + https://github.com/lyft/envoy are having link-time issue due to + linking libunwind. It was happening despite libunwind.h being present, + which is clear bug as without header we won't really use libunwind. + +M configure.ac + +commit e92acdf98db229e8dc84993fd8895d55e6c69129 +Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> +Date: Wed Apr 26 06:46:43 2017 -0300 + + Fix compilation error for powerpc32 + + Fix the following compilation error for powerpc32 platform when using + latest glibc. + error: ‘siginfo_t’ was not declared in this scope + +M src/stacktrace_powerpc-linux-inl.h + +commit b48403a4b065830129e238feffe022abd93af807 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 16 21:45:51 2017 -0700 + + 2.6rc + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 53f15325d93fbe0ba17bb3fac3da86ffd3f0f1ad +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun May 14 20:24:27 2017 -0700 + + fix compilation of tcmalloc_unittest.cc on older llvm-gcc + +M src/tests/tcmalloc_unittest.cc + +commit b1d88662cb4a38ee47aa48076745898033526f9d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 2 19:27:00 2017 -0700 + + change size class to be represented by 32 bit int + + This moves code closer to Google-internal version and provides for + slightly tighter code encoding on amd64. + +M src/common.h +M src/packed-cache-inl.h +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc +M src/tests/packed-cache_test.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit 991f47a159f0e169883f06686f13c31688fa2bf0 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Feb 2 15:58:08 2017 -0800 + + change default transfer batch back to 32 + + Some tensorflow benchmarks are seeing large regression with elevated + values. So lets stick to old safe default until we understand how + to make + larger values work for all workloads. + +M src/common.cc + +commit 7bc34ad1f60be2df6ca38f4bffdba4daa9aa9a7d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Feb 2 15:38:41 2017 -0800 + + support different number of size classes at runtime + + With TCMALLOC_TRANSFER_NUM_OBJ environment variable we can change + transfer batch size. And with that comes slightly different number of + size classes depending on value of transfer batch size. + + We used to have hardcoded number of size classes, so we couldn't + really support any batch size setting. + + This commit adds support for dynamic number of size classes (runtime + value returned by Static::num_size_classes()). + +M src/central_freelist.cc +M src/common.cc +M src/common.h +M src/page_heap.cc +M src/static_vars.cc +M src/static_vars.h +M src/tcmalloc.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit 4585b78c8dae9183dbf5f124c0343a9f1244ed67 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Feb 21 00:27:39 2017 -0800 + + massage allocation and deallocation fast-path for performance + + This is significant speedup of fast-path of malloc. Large part comes + from avoiding expensive function prologue/epilogue. Which is achieved + by making sure that tc_{malloc,new,free} etc are small functions that + do only tail-calls. We keep only critical path in those functions and + tail-call to slower "full" versions when we need to deal with less + common case. This helps compiler generate much tidier code. + + Fast-path readyness check is now different too. We used to have "min + size for slow path" variable, which was set to non-zero value when we + know that thread cache is present and ready. We now have use + thread-cache pointer not equal to NULL as readyness check. + + There is special ThreadCache::threadlocal_data_.fast_path_heap copy of + that pointer that can be temporarily nulled to disable malloc fast + path. This is used to enable emergency malloc. + + There is also slight change to tracking thread cache size. Instead of + tracking total size of free list, it now tracks size headroom. This + allows for slightly faster deallocation fast-path check where we're + checking headroom to stay above zero. This check is a bit faster than + comparing with max_size_. + +M src/linked_list.h +M src/tcmalloc.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/patch_functions.cc + +commit 5964a1d9c98ea3c178435ff01f9b06e03eeda58d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Feb 21 00:42:29 2017 -0800 + + always inline a number of hot functions + +M src/page_heap.h +M src/pagemap.h +M src/tcmalloc.cc +M src/thread_cache.h + +commit e419b7b9a66c39b44115b01520fb25a5100cec83 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 21:11:14 2017 -0800 + + introduce ATTRIBUTE_ALWAYS_INLINE + +M src/base/basictypes.h +M src/tcmalloc.cc + +commit 7d588da7ec4f315ea2d02824d7e8813b0f95171d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Dec 18 18:36:57 2016 -0800 + + synchronized Sampler implementation with Google-internal version + + This is mostly dropping FastLog2 which was never necessary for + performance, and making sampler to be called always, even if sampling + is disabled (this benefits more for always-sampling case of Google + fork). + + We're also getting TryRecordAllocationFast which is not used yet, but + will be as part of subsequent fast-path speedup commit. + +M src/sampler.cc +M src/sampler.h +M src/static_vars.cc +M src/tests/sampler_test.cc +M src/thread_cache.h + +commit 27da4ade70d45312bfdf334aa8cf0d63bf78df14 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Tue Feb 21 00:41:43 2017 -0800 + + reduce size of class_to_size_ array + + Since 32-bit int is enough and accessing smaller array will use a bit + less of cache. + +M src/common.h +M src/tcmalloc.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit 335f09d4e43a8413642e17e5ac374f925906c4e3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 23:49:15 2017 -0800 + + use static location for pageheap + + Makes it a bit faster to access, since we're dropping pointer + indirection. + +M src/static_vars.cc +M src/static_vars.h + +commit 6ff332fb517734cf01e86272fe37521b7155c995 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 23:37:33 2017 -0800 + + move size classes map earlier in SizeMap + + Since we access them more often, having at least one of them at offset + 0 makes pi{c,e} code a bit smaller. + +M src/common.h + +commit 121b1cb32e7416ecb7b0c739588f1bd916c4913a +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 22:57:43 2017 -0800 + + slightly faster size class cache + + Lower bits of page index are still used as index into hash + table. Those lower bits are zeroed, or-ed with size class and + placed into hash table. So checking is just loading value from hash + table, xoring with higher bits of address and checking if resultant + value is lower than 128. Notably, size class 0 is not considered + "invalid" anymore. + +M src/central_freelist.cc +M src/packed-cache-inl.h +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc +M src/tests/packed-cache_test.cc + +commit b57c0bad41700238f524b28483a1c6c18c0a621f +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 16 23:15:16 2017 -0700 + + init tcmalloc prior to replacing system alloc + + Currently on windows, we're depending on uninitialized tcmalloc + variables to detect freeing foreign malloc's chunks. This works + somewhat by chance due to 0-initialized size classes cache working as + cache with no values. But this is about to change, so lets do explicit + initialization. + +M src/tcmalloc.cc + +commit 71fa9f873065e3d7c1f4ce0581d26b6498712f00 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 21:17:48 2017 -0800 + + use 2-level page map for 48-bit addresses + + 48 bits is size of x86-64 and arm64 address spaces. So using 2 levels + map for them is slightly faster. We keep 3 levels for small-but-slow + configuration, since 2 levels consume a bit more memory. + + This is partial port of Google-internal commit by Sanjay + Ghemawat (same idea, different implementation). + +M src/page_heap.h +M src/pagemap.h + +commit bad70249dd5c829b4981aecdc25953800d6745c3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 21:12:58 2017 -0800 + + use 48-bit addresses on 64-bit arms too + +M src/common.h + +commit 5f12147c6dbfe2cfbdc7553521fe0110073135f0 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 21:03:58 2017 -0800 + + use hidden visibility for some key global variables + + So that our -fPIC code is faster + +M src/base/basictypes.h +M src/static_vars.h + +commit dfd53da578137d5962fe8d588980870bb0ab8aa9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 22 21:08:51 2017 -0800 + + set ENOMEM in handle_oom + +M src/tcmalloc.cc + +commit 14fd551072426cef317f43d587f91004d4fdae75 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 8 06:02:04 2017 +0300 + + avoid O(N²) in thread cache creation code + +M src/thread_cache.cc + +commit 507a105e849422d5ceff4348d38aaf72371a6161 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 14:25:10 2016 -0800 + + pass original size to DoSampledAllocation + + It makes heap profiles more accurate. Google's internal malloc + is doing + it as well. + +M src/tcmalloc.cc + +commit bb77979dea796ab743e1308af25e9259ec97f2b1 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 18:10:19 2016 -0800 + + don't declare throw() on malloc funtions since it is faster + + Apparently throw() on functions actually asks compiler to generate + code + to detect unexpected exceptions. Which prevents tail calls + optimization. + + So in order to re-enable this optimization, we simply don't tell + compiler about throw() at all. C++11 noexcept would be even better, + but + it is not universally available yet. + + So we change to no exception specifications. Which at least for gcc & + clang on Linux (and likely for all ELF platforms, if not just all) + really eliminates all overhead of exceptions. + +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/tcmalloc.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 89c74cb79ca41cd75a1f9131af4ea2ab362593ae +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 20:26:40 2016 -0800 + + handle duplicate google_malloc frames in malloc hook stack trace + + Subsequent optimization may cause multiple malloc functions in + google_malloc section to be in call stack. Particularly when fast-path + malloc function calls slow-path and compiler chooses to implement such + call as regular call instead of tail-call. + + Because we need stacktrace just until first such function, once + we find + innermost such frame, we're simply checking if next outer frame + is also + google_malloc and consider it instead. + +M src/malloc_hook.cc + +commit 0feb1109ac957144a50d964b0ff244a6a286174a +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 21:08:57 2016 -0800 + + fix stack trace capturing in debug malloc + + Particularly, hardcoded skip count was relying on certain behavior of + compiler. Namely, that tail calls inside DebugDeallocate path are not + actually implemented as tail calls. + + New implementation is using google_malloc section as a marker + of malloc + boundary. But in order for this to work, we have to prevent + tail-call in + debugallocation's tc_XXX functions. Which is achieved by doing + volatile + read of static variable at the end of such functions. + +M src/debugallocation.cc + +commit 0506e965ee5184490fbe7acea470458562128a79 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Dec 18 18:36:00 2016 -0800 + + replace LIKELY/UNLIKELY with PREDICT_{TRUE,FALSE} + + Google-internal code is using PREDICT_TRUE/FALSE, so we should be + doing it too. + +M src/base/basictypes.h +M src/common.cc +M src/common.h +M src/emergency_malloc.h +M src/malloc_hook-inl.h +M src/tcmalloc.cc +M src/thread_cache.h + +commit 59a4987054021eaf24143a27cf01252fafb5052d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Mar 13 16:04:43 2016 -0700 + + prevent inlining ATTRIBUTE_SECTION functions + + So that their code is always executing in prescribed section. + +M src/base/basictypes.h + +commit ebb575b8a0153a70f43fd1c3ec33fe30f94c97d0 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Thu Feb 2 15:41:13 2017 -0800 + + Revert "enabled aggressive decommit by default" + + This reverts commit 7da5bd014d77ddaf694054b1e3ae0a3ef92ab384. + + Some tensorflow benchmarks are getting slower with aggressive + decommit. + +M src/static_vars.cc +M src/tests/tcmalloc_unittest.sh + +commit b82d89cb7c8781a6028f6f5959cabdc5a273aec3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 22 13:15:39 2017 -0800 + + Revert "disable dynamic sized delete support by default" + + This reverts commit 06811b3ae4c5365880898148d188ae91d20f52f2. + +M configure.ac + +commit fac0bb44d5661946d9839496ddf104942dd00abe +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 22 13:11:05 2017 -0800 + + Do not depend on memchr in commandlineflags::StringToBool + + It lets us use it from inside ifunc handler. + +M src/base/commandlineflags.h + +commit 7d49f015a01e35155517332a2dfd95f16b568939 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 22 13:08:34 2017 -0800 + + Make GetenvBeforeMain work inside ifunc handler + + Depending on things such us strlen, which are ifunc-ed themselves is + not supported from inside ifunc handler. + + Thus we implement strlen, memchr and memcmp in slow naive C++ just for + that place. + +M src/base/sysinfo.cc + +commit a2550b6309153c250f67255b6271ec988cc7d0fb +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 26 23:46:18 2017 -0800 + + turn bench_fastpath_throughput into actual throughput benchmark + + Previously we bumped size by 16 between iterations, but for many size + classess that gave is subsequent iteration into same size + class. Multiplying by prime number randomizes sizes more so speeds up + this benchmark on at least modern x86. + +M benchmark/malloc_bench.cc + +commit b762b1a4923f769487de818175655c1683d84db5 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 14:12:02 2016 -0800 + + added sized free benchmarks to malloc_bench + +M benchmark/malloc_bench.cc + +commit 71ffc1cd6b3ea99c97ed6d0e16caeb9d1b20eec7 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 14:11:40 2016 -0800 + + added free lists randomization step to malloc_bench + +M benchmark/malloc_bench.cc + +commit 732dfeb83d726de0aea47f0e6aa2abff3e0ad680 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 22 13:57:31 2017 -0800 + + Run StartStopNoOptionsEmpty profiledata unittest + + Somehow we had this test, but didn't arrange to actually run it. GCC + gave warning and now we do. + +M src/tests/profiledata_unittest.cc + +commit cbb312fbe8022378c4635b3075a80a7827555170 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Dec 18 11:08:54 2016 -0800 + + aggressive decommit: only free necessary regions and fix O(N²) + + We used to decommit (with MADV_FREE or MADV_DONTNEED) whole combined + span when freeing span in aggressive decommit mode. The issue with + that is preceding or following span with which we combined span we're + freeing could be freed already and fairly large. By passing all of + that memory to decommit, we force kernel to scan all of those pages. + + When mass-freeing many objects old behavior led to O(N^2) behavior + since freeing single span could lead to MADV_{FREE,DONTNEED}-ing of + most of the heap. + + New implementation just does decommit of individual bits as needed. + + While there, I'm also adding locking to calls of + PageHeap::{Get,Set}AggressiveDecommit. + + This partially (or mostly) fixes issue #839. + +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc + +commit 6d98223a9035ec1c5e9a7eb0196b21021d48a41f +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 12 17:12:44 2017 -0800 + + don't build with -fno-exceptions + + It looks like, in past it could produce better code. But since + unwinding is totally different since almost forever now, there is no + perfomance benefit of it anymore. + +M Makefile.am + +commit d6a1931cce303ea279b744c9b438686e4f5994e5 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 15:33:10 2016 -0800 + + fixed warning in casting heap of checker's main_thread_counter + + I.e. compiler didn't like int to void * casting and upcasting to + intptr_t first (which has same size as void *) makes it happier. + +M src/heap-checker.cc + +commit 5c778701d98c642ecff97d044cd1c0b8b862aba3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 12 17:54:59 2017 -0800 + + added tcmalloc minimal unittest with ASSERTs checked + +M .gitignore +M Makefile.am + +commit a9167617abbcebe9d472141c53a20ce00c8c8d75 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Apr 29 19:09:47 2017 -0700 + + drop unused g_load_map variable in patch_functionc.cc + +M src/windows/patch_functions.cc + +commit d52e56dcb5aad0521ccc67d7e57dfcb8d41ea568 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Apr 29 19:06:55 2017 -0700 + + don't compare integer to NULL + +M src/windows/preamble_patcher.cc + +commit bae00c0341f49ade473d24c117226552a59d3e75 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 23 20:33:16 2017 -0700 + + add fake_stacktrace_scope to few msvc projects + + Otherwise it predictably fails at linking phase due to missing + symbols. + +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 79aab4fed40f9aac03dacb32a108861eff689beb +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 16 22:17:56 2017 -0700 + + correctly dllexport nallocx on windows + +M src/gperftools/nallocx.h +M src/tcmalloc.cc + +commit b010895a088703f512cb0e225c838306b2904069 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Apr 30 18:32:10 2017 -0700 + + don't undef PERFTOOLS_DLL_DECL + + This is not necessary and will help adding more headers with + dll-exported functions. + +M src/gperftools/tcmalloc.h.in +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 491b1aca7edbbaed9ea6f7baaa547a35cf385130 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Apr 29 16:46:21 2017 -0700 + + don't try to use pthread_atfork on windows + +M src/maybe_threads.cc +M src/windows/mingw.h + +commit 691045b95761bedb9170d758be82f9b2f15c673a +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 14:41:28 2016 -0800 + + suppress warnings from legacy headers while building legacy headers + test + +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/google/tcmalloc.h +M src/tests/simple_compat_test.cc + +commit 22f7ceb97a63bcbc9d824a80916bb7a809fad1f2 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Apr 29 15:25:06 2017 -0700 + + use unsigned for few flags in mini_disassembler_types.h + + We're initializing them with value that doesn't fit into signed 32-bit + int and somehow mingw version that I have not just warns, but actually + errors on that. + +M src/windows/mini_disassembler_types.h + +commit 9b17a8a5ba1c8320d12c6e6df0ab54ff218b50e6 +Author: Mostyn Bramley-Moore <mostynb@opera.com> +Date: Thu May 11 20:40:18 2017 +0200 + + remove superfluous size_t value >= 0 check + +M src/debugallocation.cc + +commit 86ce69d77ff5f881c5701901bf7d1ef884f9e33a +Author: Eugene <n.eugene536@gmail.com> +Date: Sat Feb 4 17:57:34 2017 +0300 + + Update binary_trees.cc + +M benchmark/binary_trees.cc + +commit cd8586ed6c8bed550ad727ce8af83a1a6bd1ca8e +Author: Ola Olsson <ola1olsson@gmail.com> +Date: Tue Mar 21 14:07:16 2017 +0100 + + Fix path names in README + +M README + +commit 98753aa73772bb0fae8cee668e513d5754ab7737 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 16:04:14 2017 -0800 + + test that sized deallocation really works before enabling it + + Weirdly, some OSX compiler does allow -fsized-deallocation, yet barks + on any attempt to actually use it at compile time (!). So lets detect + this as well in configure and opt out as necessary. + + Should fix issue #870. + +M configure.ac + +commit 5618ef78505a854bd0d2c145fd9cf7967226a20e +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 15:31:41 2017 -0800 + + Don't assume memalign exists in memalign vs nallocx test + + OSX and windows have issues with memalign. So test against tc_memalign + instead. + + This should fix _memalign linker-time part of issue #870. + +M src/tests/tcmalloc_unittest.cc + +commit bf640cd740fe38f6f10faa8683e8361fee971aba +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 14:51:27 2017 -0800 + + rename sys allocator's sys_alloc symbol to tcmalloc_sys_alloc + + Since we're not building with hidden visibility, symbols of other DSOs + can interpose our symbols. And sys_alloc is just too generic name. + + And in fact erlang runtime has sys_alloc function. Which means we're + trying to write to it's first bytes as part of initializing system + allocator and crash. + + This should fix issue #843. + +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/windows/system-alloc.cc + +commit 069e3b16551186f8da0a41c185550fdfd11af35b +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 20 14:25:18 2017 -0800 + + build malloc_bench_shared_full only when full tcmalloc is built + + I.e. because otherwise, when --enable-minimal is given, we're building + empty libtcmalloc.la and linking it to malloc_bench_shared_full. Which + has no effect at all and actually breaks builds on OSX. + + Should fix issue #869. + +M Makefile.am + +commit b8f9d0d44f94177d34b069180618b7d002e85b69 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Dec 18 09:35:02 2016 -0800 + + ported nallocx support from Google-internal tcmalloc + + nallocx is extension introduced by jemalloc. It returns effective size + of allocaiton without allocating anything. + + We also support MALLOCX_LG_ALIGN flag. But all other jemalloc + flags (which at the moment do nothing for nallocx anyways) are + silently ignored, since there is no sensible way to return errors in + this API. + + This was originally contributed by Dmitry Vyukov with input from + Andrew Hunter. But due to significant divergence of Google-internal + and free-software forks of tcmalloc, significant massaging was done by + me. So all bugs are mine. + +M Makefile.am +A src/gperftools/nallocx.h +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc + +commit b0abefd93834d9d9c7ffaae2d23bd48ed8e96477 +Author: Andrew Morrow <acm@mongodb.com> +Date: Sat Dec 17 13:57:41 2016 -0500 + + Fix a typo in the page fence flag declaration + +M src/debugallocation.cc + +commit 855b3800064db49af823b85a54be269923eb6f4d +Author: Kirill Müller <krlmlr@mailbox.org> +Date: Tue Nov 15 09:58:11 2016 +0100 + + replace docs by doc + +M Makefile.am +M README +D doc +M packages/deb/docs +M packages/deb/libgperftools0.manpages +M src/gperftools/heap-checker.h +M src/gperftools/heap-profiler.h +M src/gperftools/profiler.h +M src/profiledata.h +M src/tcmalloc.cc + +commit 664210ead806d700cdbe5eeaf75d7a066fdac541 +Author: Kirill Müller <krlmlr@mailbox.org> +Date: Tue Nov 15 09:42:12 2016 +0100 + + doc -> docs, with symlink + +A doc +R100 doc/cpuprofile-fileformat.html docs/cpuprofile-fileformat.html +R100 doc/cpuprofile.html docs/cpuprofile.html +R100 doc/designstyle.css docs/designstyle.css +R100 doc/heap-example1.png docs/heap-example1.png +R100 doc/heap_checker.html docs/heap_checker.html +R100 doc/heapprofile.html docs/heapprofile.html +R100 doc/index.html docs/index.html +R100 doc/overview.dot docs/overview.dot +R100 doc/overview.gif docs/overview.gif +R100 doc/pageheap.dot docs/pageheap.dot +R100 doc/pageheap.gif docs/pageheap.gif +R100 doc/pprof-test-big.gif docs/pprof-test-big.gif +R100 doc/pprof-test.gif docs/pprof-test.gif +R100 doc/pprof-vsnprintf-big.gif docs/pprof-vsnprintf-big.gif +R100 doc/pprof-vsnprintf.gif docs/pprof-vsnprintf.gif +R100 doc/pprof.1 docs/pprof.1 +R100 doc/pprof.see_also docs/pprof.see_also +R100 doc/pprof_remote_servers.html docs/pprof_remote_servers.html +R100 doc/spanmap.dot docs/spanmap.dot +R100 doc/spanmap.gif docs/spanmap.gif +R100 doc/t-test1.times.txt docs/t-test1.times.txt +R100 doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.1024.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.128.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.131072.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.16384.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.2048.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.256.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.32768.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.4096.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.512.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.64.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.65536.bytes.png +R100 doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png +docs/tcmalloc-opspercpusec.vs.threads.8192.bytes.png +R100 doc/tcmalloc-opspersec.vs.size.1.threads.png +docs/tcmalloc-opspersec.vs.size.1.threads.png +R100 doc/tcmalloc-opspersec.vs.size.12.threads.png +docs/tcmalloc-opspersec.vs.size.12.threads.png +R100 doc/tcmalloc-opspersec.vs.size.16.threads.png +docs/tcmalloc-opspersec.vs.size.16.threads.png +R100 doc/tcmalloc-opspersec.vs.size.2.threads.png +docs/tcmalloc-opspersec.vs.size.2.threads.png +R100 doc/tcmalloc-opspersec.vs.size.20.threads.png +docs/tcmalloc-opspersec.vs.size.20.threads.png +R100 doc/tcmalloc-opspersec.vs.size.3.threads.png +docs/tcmalloc-opspersec.vs.size.3.threads.png +R100 doc/tcmalloc-opspersec.vs.size.4.threads.png +docs/tcmalloc-opspersec.vs.size.4.threads.png +R100 doc/tcmalloc-opspersec.vs.size.5.threads.png +docs/tcmalloc-opspersec.vs.size.5.threads.png +R100 doc/tcmalloc-opspersec.vs.size.8.threads.png +docs/tcmalloc-opspersec.vs.size.8.threads.png +R100 doc/tcmalloc.html docs/tcmalloc.html +R100 doc/threadheap.dot docs/threadheap.dot +R100 doc/threadheap.gif docs/threadheap.gif + +commit 75dc9a6e1470fa82b828f9687edad48f53d740b1 +Author: zmertens <zmertens@asu.edu> +Date: Fri Nov 18 15:27:32 2016 -0700 + + Fix Post(s)cript tyos + +M doc/pprof.1 +M src/pprof + +commit dde32f8bbc95312379f9f5a651799815bb6327c5 +Author: Francis Ricci <fjricci@fb.com> +Date: Tue Oct 11 11:14:06 2016 -0700 + + Fix unaligned memory accesses in debug allocator + +M src/base/basictypes.h +M src/debugallocation.cc + +commit 02eeed29df112728564a5dde6417fa4622b57a06 +Author: Ryan Macnak <rmacnak@google.com> +Date: Tue Sep 27 14:03:05 2016 -0700 + + Fix redefinition of mmap on aarch64. + +M src/base/linux_syscall_support.h + +commit c07a15cff4b904ac45e4019f8e36eeffd3e8186c +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Sep 24 18:56:22 2016 -0700 + + [windows] patch _free_base as well + + Looks like _free_base is used as generic free/delete, which is not + checked for free/delete mismatch by their debug runtime. Issue #817 + occurs because something that is allocated by regular allocation + functions (new or malloc) is freed by _free_base. Patch it as + well fixes + the problem. + + Closes #817. + +M src/windows/patch_functions.cc + +commit acac6af26b0ef052b39f61a59507b23e9703bdfa +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Sep 24 16:19:27 2016 -0700 + + Fix finding default zone on macOS sierra + + This is taken from jemalloc patch at + https://github.com/jemalloc/jemalloc/pull/427/commits/19c9a3e828ed46f1576521c264640e60bd0cb01f + by Mike Hommey (aka glandium). + + The issue was reported together with advise to take that jemalloc + fix by + Koichi Shiraishi and David Ribeiro Alves at gperftools issue #827. + + Closes issue #827. + +M src/libc_override_osx.h + +commit 7822b5b0b9fa7e016e1f6b46ea86f26f4691a457 +Author: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> +Date: Mon Aug 8 13:50:10 2016 -0300 + + Stop using glibc malloc hooks + + glibc deprecated malloc hooks in 2011 and will be removing them soon. + These hooks aren't safe and aren't necessary when the malloc API is + fully exported. + +M configure.ac +M src/libc_override_glibc.h + +commit c92f0ed0891d69888cdc7e904c82048362b2dd8d +Author: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com> +Date: Wed Jul 27 17:15:52 2016 -0300 + + Remove references to __malloc_initialize_hook + + __malloc_initialize_hook has been deprecated in glibc since 2011 + and is + being removed on glibc 2.24. + +M src/libc_override_glibc.h + +commit 9709eef361aa8c46d6b14c08bebead7b8185e731 +Merge: eb474c9 44f276e +Author: Aliaksey Kandratsenka (aka Aliaksei Kandratsenka) +<alkondratenko@gmail.com> +Date: Wed Aug 24 22:33:05 2016 -0700 + + Merge pull request #821 from jtmcdole/patch-1 + + Rename TCMALLOC_DEBUG to PERFTOOLS_VERBOSE + +commit 44f276e132eef8ba013a04b4af92ce26464e2b54 +Author: John McDole <jtmcdole@gmail.com> +Date: Wed Aug 24 17:47:28 2016 -0700 + + Rename TCMALLOC_DEBUG to PERFTOOLS_VERBOSE + +M README + +commit eb474c995eadef3edb47bc081f09693228832794 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jul 3 18:20:32 2016 -0700 + + Summary: support gcc atomic ops on clang too + + Clang actually does support __atomic_XXX atomic ops builtins but does + not advertise itselt as gcc 4.7 or later. So we now detect clang + separetely. + + We're enabling gcc atomic ops on clang >= 3.4 since this is the oldest + version that I can test. + + This should fix issue #797. + +M src/base/atomicops.h + +commit 7f86eab1f3e0636a79a96fd44e3bb51db0457a86 +Author: Kenton Varda <kenton@sandstorm.io> +Date: Thu Jun 30 18:32:39 2016 -0700 + + Recognize .node files as shared libraries + + node.js extensions are regular shared libraries named with the `.node` + extension. With this small change I was able to get a usable heap + profile out of node to debug an extension I'm working on. + +M src/pprof + +commit bf8eacce69df877d1cecdab8c7a6cc3f218ebcc2 +Author: Bryan Chan <bryanpkc@gmail.com> +Date: Wed Jun 15 00:10:45 2016 -0400 + + Add support for 31-bit s390; merge linux_syscall_support.h changes + from upstream. + +M configure.ac +M m4/pc_from_ucontext.m4 +M src/base/basictypes.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.h +M src/getpc.h +M src/malloc_hook_mmap_linux.h + +commit c54218069b96e5a64e81186cd14107bf6660b61d +Author: savefromgoogle <savefromgoogle@users.noreply.github.com> +Date: Sat May 14 17:50:34 2016 +0200 + + Update README + + typo corrected and direct link to Google Group mailinglist added + +M README + +commit 06f4ce65c226e2bed432119bd830a478012ea336 +Author: iivlev <iivlev@productengine.com> +Date: Fri Apr 15 17:46:25 2016 +0300 + + Small performance tweak: avoid calling time() if we don't need it + +M src/heap-profiler.cc + +commit db8d4836091b93100b176e8cab4e842974234e4e +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Jun 25 16:24:40 2016 -0700 + + Autogenerate ChangeLog from git on make dist + + This fixes build breakage introduced in preceding commit for issue + #796. + +A ChangeLog +M Makefile.am +M configure.ac +A m4/ax_generate_changelog.m4 + +commit 4a1359831938b5a71b2faf38c02003053af253d9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Jun 25 16:12:48 2016 -0700 + + renamed ChangeLog to ChangeLog.old + + I.e. to avoid confusion. This should fix issue #796. + +R100 ChangeLog ChangeLog.old + +commit 7852eeb75b9375cf52a7da01be044da6e915dd08 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Apr 9 13:09:18 2016 -0700 + + Use initial-exec tls for libunwind's recursion flag + + If we don't do it, then reading variable calls to __tls_get_addr, + which + uses malloc on first call. initial-exec makes dynamic linker + reserve tls + offset for recusion flag early and thus avoid unsafe calls to malloc. + + This fixes issue #786. + +M src/base/basictypes.h +M src/stacktrace_libunwind-inl.h +M src/thread_cache.h + +commit a07f9fe75af25ce388af3d4ff4514b42513d766f +Author: Mike Gaffney <mike@uberu.com> +Date: Thu Mar 31 11:27:21 2016 -0700 + + gerftools -> gperftools in readme + +M README + +commit 9fd6d2687914a1f58a8ce457d6a1bd3d55ea0747 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Mar 20 12:29:40 2016 -0700 + + added define to enable MADV_FREE usage on Linux + + Building with -DTCMALLOC_USE_MADV_FREE will enable usage of + MADV_FREE on + Linux if glibc copy of kernel headers has MADV_FREE defined. + + I.e. so that people can test this more easily. + + Affects ticket #780. + +M src/system-alloc.cc + +commit 6f7a14f45e2e2e16fbbd250543f5758078e2f5f0 +Author: Mathias Stearn <redbeard0531@gmail.com> +Date: Wed Mar 16 18:22:32 2016 -0400 + + Don't use MADV_FREE on Linux + + Addresses #780. + +M src/system-alloc.cc + +commit 55cf6e6281f2f19ed04a6b2a90e2dcaec08e16d0 +Author: Aman Gupta <aman@tmm1.net> +Date: Mon Mar 14 12:20:40 2016 -0700 + + Fix symbol resolution on OSX + + The library mapping ranges on OSX and Linux are sorted in opposite + orders + + 7f71c3323000-7f71c3339000 r-xp 00000000 09:02 29099128 + /lib/x86_64-linux-gnu/libz.so.1.2.3.4 + 7f71c3339000-7f71c3538000 ---p 00016000 09:02 29099128 + /lib/x86_64-linux-gnu/libz.so.1.2.3.4 + 7f71c3538000-7f71c3539000 r--p 00015000 09:02 29099128 + /lib/x86_64-linux-gnu/libz.so.1.2.3.4 + 7f71c3539000-7f71c353a000 rw-p 00016000 09:02 29099128 + /lib/x86_64-linux-gnu/libz.so.1.2.3.4 + + vs + + 108f8d000-108f95000 r-xp 00025000 00:00 0 + /usr/local/opt/libmemcached/lib/libmemcached.11.dylib + 108f8c000-108f8d000 r-xp 00024000 00:00 0 + /usr/local/opt/libmemcached/lib/libmemcached.11.dylib + 108f68000-108f8c000 r-xp 00000000 00:00 0 + /usr/local/opt/libmemcached/lib/libmemcached.11.dylib + +M src/pprof + +commit 8e858436223db7729d41304ad29a29946d47382b +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Mar 13 15:11:50 2016 -0700 + + added simple .travis.yml config + +A .travis.yml + +commit 05e40d29c034c0d68322e589b8d6e234f562be2e +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Mar 8 15:58:02 2016 -0500 + + Recognize modern Linux ARM + + Commit e580d7888154fa7f95b3cef9e18f1ce69182212b fixed the macros + in some + of the code but not other places. + + `make check` still fails in the same places on a Debian Jessie armhf + system. + +M src/base/elfcore.h +M src/base/linuxthreads.h + +commit 632de2975e63f89613af9ab99bc1603a4a6332aa +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 11:48:20 2016 -0800 + + bumped version up to 2.5 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 6682016092c735feb29021acbb8c2067fdf1a209 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 12 10:54:05 2016 -0800 + + Unbreak profiling with CPUPROFILE_FREQUENCY=1 + + This closes ticket #777. + + No test sadly, since it's not trivial to unittest this case. But + fix with single-shot manual testing is better than nothing. + +M src/profile-handler.cc + +commit 6ff86ff6a783a277956e45db68c6a2b729f27000 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 5 16:17:28 2016 -0800 + + bumped version to 2.4.91 for 2.5rc2 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 782165fa7f2c49d6a67c2415626a1f215cc21ac2 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 5 15:47:25 2016 -0800 + + build sized delete aliases even when sized-delete is disabled + + In this case we alias to regular delete. This is helpful because if we + don't override sized delete, then apps will call version in libstdc++ + which delegates calls to regular delete, which is slower than calling + regular delete directly. + +M configure.ac +M src/libc_override_gcc_and_weak.h + +commit 06811b3ae4c5365880898148d188ae91d20f52f2 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 5 15:08:50 2016 -0800 + + disable dynamic sized delete support by default + + IFUNC relocations don't support our advanced use case (calling + application function or looking up environment variable). + + Particularly, it doesn't work on PPC and arm when tcmalloc is linked + with -Wl,-z,now. See RedHat's bugzilla ticket + https://bugzilla.redhat.com/show_bug.cgi?id=1312462 for more details. + +M configure.ac + +commit d4d99eb608f3d778fa301137a85cc4903a1dd33e +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Mar 5 14:53:42 2016 -0800 + + unbreak compilation with visual studio + + Specifically, this commit adds missing fake_stacktrace_scope.cc + to msvc + build and removes undef-ing of PERFTOOLS_DLL_DECL by tcmalloc.h. + +M src/gperftools/tcmalloc.h.in +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj + +commit 126d4582c18d053452d22ce12351c5429513aabe +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Mar 1 17:57:33 2016 -0500 + + Call function pointers with the right type + + I think it's undefined behavior, but it's definitely weird. ubsan + complains too. + +M src/tests/tcmalloc_unittest.cc + +commit e0fa28ef7d50158b271da90f53ab3aee5ba441d8 +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Mar 1 17:56:32 2016 -0500 + + Don't shift a type by more than its width + + It's undefined behavior, so ubsan complains. + +M src/tests/sampler_test.cc + +commit a1c764d2632cc0090f31d8d56effb94a8d049b54 +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Mar 1 17:55:59 2016 -0500 + + Initialize counters in test + +M src/tests/profile-handler_unittest.cc + +commit 22123a37c236e26535d3f3fff7f31a5b6515d7d6 +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Mar 1 17:55:53 2016 -0500 + + Don't overflow a signed integer + + It's undefined behavior and ubsan catches it. + +M src/base/low_level_alloc.cc + +commit 66e1e94f38467b5c7bbfb05e3c7267f3039a2c69 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Mon Feb 29 20:09:39 2016 -0800 + + added minimal "header section" to README + +M README + +commit 2804b7cfee8d49d5ab98340cbf3050c0a5f57b98 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 20:27:48 2016 -0800 + + bumped version to 2.5rc + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit f47fefbfc1083e7efbada3e7e7d2859d31e151e7 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 22:38:23 2016 -0800 + + updated NEWS for 2.5rc + +M NEWS + +commit cef60361741e858041f4bb4d46ce00c6db89b711 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 23:03:41 2016 -0800 + + alias same malloc/free variants to their canonical versions + + This is similar to what gcc 5 does anyways, except gcc 5 is placing + jumps which adds a bit of overhead. + + Instead of letting gcc do it, we alias using ELF symbol aliasing. All + free variants (tc_delete{,array}_{,nothrow}) are aliased to + tc_free. There are 3 malloc variants that differ by oom + handling. tc_newarray is aliased to tc_new. And tc_newarray_nothrow is + aliased to tc_new_nothrow. + + This aliasing only happens in non-debug malloc, since debug malloc + does + distinguish between different variants since it needs checking for + mismatch. + +M src/tcmalloc.cc + +commit ea8d242061460309d174d9053a224cf94e65e6fe +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 20:16:37 2016 -0800 + + Re-enable MultipleIdleNonIdlePhases test + + Which was accidentally removed as part of MarkThreadTemporarilyIdle + support. + +M src/tests/markidle_unittest.cc + +commit c9962f698b0edb37315445b33a06820f9bcf5df9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 20:07:37 2016 -0800 + + added maybe_emergency_malloc.h to Makefile.am + + Because without this reference it isn't packaged by make dist. + +M Makefile.am + +commit 7dd4af65365d74a5d8d30d5811c26117a9192238 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 19:26:37 2016 -0800 + + don't round up sizes for large allocation when sampling + + This closes #723. + + Since rounding up prior to sampling is introducing possibility of + arithmetic overflow, we're just not doing it. + + It introduces some error (up to 4k), but since we're dealing with at + least 256k allocations, we're fine. + +M src/tcmalloc.cc + +commit 4f3410e759ec42cb307429222d690a81e3cd37b0 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Feb 21 13:52:47 2016 -0800 + + enable emergency malloc by default on arm when using libunwind + + Because by default libunwind on arm does fopen which mallocs + which will + deadlock unless we enable emergency malloc. + +M configure.ac + +commit 7f12051dbe1f402771a747c1192a66e7571d94f9 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 31 23:17:50 2016 -0800 + + implemented emergency malloc + + Emergency malloc is enabled for cases when backtrace capturing + needs to + call malloc. In this case, we enable emergency malloc just prior to + calling such code and disable it after it is done. + +M Makefile.am +M configure.ac +M src/debugallocation.cc +A src/emergency_malloc.cc +A src/emergency_malloc.h +A src/emergency_malloc_for_stacktrace.cc +A src/fake_stacktrace_scope.cc +M src/malloc_hook.cc +A src/maybe_emergency_malloc.h +M src/stacktrace.cc +M src/tcmalloc.cc +M src/thread_cache.h + +commit 3ee2360250c639f02b354820bc50efc6e104b754 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 21:49:58 2016 -0800 + + replaced invalid uses of __THROW + + We're now using it only when overriding glibc functions (such + as malloc + or mmap). In other cases (most importantly in public tcmalloc.h + header) + we're doing our own throw() to avoid possible breakage on future glibc + changes. + +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/libc_override_glibc.h +M src/libc_override_redefine.h +M src/tcmalloc.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 013b82abcfeaa158da2490ff86d6bff5a5434d7f +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 21:19:37 2016 -0800 + + unbreak <malloc.h> inclusion in gperftools/tcmalloc.h + + We have shipped header which checked HAVE_XXX defines which we only + defined in project-local config.h. So it could never work correctly. + + We're now doing #include <malloc.h> just like tc_mallinfo on constant + which we detect at configure time and write into header that we + install. + +M src/gperftools/tcmalloc.h.in + +commit 19903e6f156d0ff8063880ddc5051b5dc261c1ff +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 21:18:18 2016 -0800 + + drop detection of sys/malloc.h and malloc/malloc.h + + Which we don't really need. + +M configure.ac + +commit cdff090ebd7a3cef8de60109bef52251758181d7 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 20:43:21 2016 -0800 + + Fix several harmless clang warnings + +M src/common.cc +M src/internal_logging.cc +M src/malloc_hook.cc +M src/tests/current_allocated_bytes_test.cc + +commit 9095ed08402620b04591c2d27ca222594f0da6db +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 20:33:27 2016 -0800 + + implemented stacktrace capturing via libgcc's C++ ABI function + + Particularly _Unwind_Backtrace which seems to be gcc extension. + + This is what glibc's backtrace is commonly is using. + + Using _Unwind_Backtrace directly is better than glibc's backtrace, + since + it doesn't call into dlopen. While glibc does dlopen when it is + built as + shared library apparently to avoid link-time dependency on libgcc_s.so + +M Makefile.am +M configure.ac +M src/stacktrace.cc +A src/stacktrace_libgcc-inl.h + +commit 728cbe102108be8296f2485e384392a4408ce9db +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 20 13:06:08 2016 -0800 + + force profiler_unittest to do 'real' work + + 'XOR loop' in profiler unittest wasn't 100% effective because + it allowed + compiler to avoid loading and storing to memory. + + After marking result variable as volatile, we're now forcing + compiler to + read and write memory, slowing this loops down sufficiently. And + profiler_unittest is now passing more consistently. + + Closes #628 + +M src/tests/profiler_unittest.cc + +commit fff6b4fb8814b5f25d4f97889d5003537f7dcc92 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 31 23:14:43 2016 -0800 + + Extend low-level allocator to support custom pages allocator + +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h + +commit 32d992679589ea83e044d3f8263583ae960b0b16 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 31 22:42:20 2016 -0800 + + added malloc_bench_shared_full + +M .gitignore +M Makefile.am + +commit 00d8fa1ef8d6650f08e00f59baa22cd31b908432 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 6 19:13:07 2016 -0800 + + always use real throw() on operators new/delete + + Since non-glibc-s have no __THROW and lack of throw() on operators + gives + us warning. + +M src/libc_override_gcc_and_weak.h + +commit 08e034ad5940f24f99a24630c7b71a0de728d05b +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 6 16:19:54 2016 -0800 + + Detect working ifunc before enabling dynamic sized delete support + + Particularly, on arm-linux and x86-64-debian-kfreebsd compilation + fails + due to lack of support for ifunc. So it is necessary to test at + configure time whether ifunc is supported. + +M configure.ac + +commit a788f354a0cd6de3d714a58ad3e6ae5baf806c3b +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Feb 6 16:01:25 2016 -0800 + + include unistd.h for getpid in thread_lister.c + + This fixes warning produced on arm-linux. + +M src/base/thread_lister.c + +commit 644a6bdbdb50b26a25e4428c43556467a6e8b5cc +Author: Bryan Chan <bryanpkc@gmail.com> +Date: Fri Jan 22 12:26:47 2016 -0500 + + Add support for Linux s390x + + This resolves gperftools/gperftools#761. + +M configure.ac +M m4/pc_from_ucontext.m4 +M src/base/basictypes.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.h +M src/malloc_hook_mmap_linux.h + +commit bab7753aad44b3395a063966f32c23f632fee174 +Author: Bryan Chan <bryanpkc@gmail.com> +Date: Fri Jan 29 02:38:42 2016 -0500 + + Fix typo in heap-checker-death_unittest.sh + +M src/tests/heap-checker-death_unittest.sh + +commit 17182e1d3c324e8fe9cc02ce2822caa746d7d17a +Author: Simon Que <sque@chromium.org> +Date: Fri Jan 29 18:17:16 2016 -0800 + + Fix include of malloc_hook_c.h in malloc_hook.h + + malloc_hook.h includes malloc_hook_c.h as + <gperftools/malloc_hook_c.h>. + This requires the compiler to have designated src/gperftools as a + standard include directory (-I), which may not always be the case. + + Instead, include it as "malloc_hook_c.h", which will search in + the same + directory first. This will always work, regardless of whether it was + designated a standard include directory. + +M src/gperftools/malloc_hook.h + +commit c69721b2b2ceae426c36de191dd0a6fa443c5c7a +Author: Andrew Morrow <acm@mongodb.com> +Date: Sun Nov 22 15:19:12 2015 -0500 + + Add support for obtaining cache size of the current thread and + softer idling + +M src/gperftools/malloc_extension.h +M src/gperftools/malloc_extension_c.h +M src/malloc_extension.cc +M src/tcmalloc.cc +M src/tests/markidle_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit 5ce42e535d6b3bf6c5e41ceb9cf876a70a7e3318 +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Jan 26 16:53:48 2016 -0500 + + Don't always arm the profiling timer. + + It causes a noticeable performance hit and can sometimes confuse GDB. + + Tested with CPUPROFILE_PER_THREAD_TIMERS=1. + + Based on an old version by mnissler@google.com. + +M src/profile-handler.cc +M src/profile-handler.h +M src/tests/profile-handler_unittest.cc + +commit 7f801ea091e03dae7b13160f76fecce15727c9c0 +Author: Duncan Sands <baldrick@free.fr> +Date: Fri Jan 15 18:12:51 2016 +0100 + + Make sure the alias is not removed by link-time optimization when + it can prove + that it isn't used by the program, as it might still be needed to + override the + corresponding symbol in shared libraries (or inline assembler for + that matter). + For example, suppose the program uses malloc and free but not calloc + and is + statically linked against tcmalloc (built with -flto) and LTO + is done. Then + before this patch the calloc alias would be deleted by LTO due to + not being + used, but the malloc/free aliases would be kept because they are + used by the + program. Suppose the program is dynamically linked with a shared + library that + allocates memory using calloc and later frees it by calling free. + Then calloc + will use the libc memory allocator, because the calloc alias was + deleted, but + free will call into tcmalloc, resulting in a crash. + +M src/libc_override_gcc_and_weak.h + +commit 6b3e6ef5e0ab65030d116dd55dac8d64f9c72d33 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Jan 24 19:45:16 2016 -0800 + + don't retain compatibility with old docdir behavior + + Since it is not really needed. And since we don't care about too + ancient + autoconfs. + +M configure.ac + +commit ccffcbd9e988a39654fe6ec5edd6d817f266ded9 +Author: Chris Mayo <aklhfex@gmail.com> +Date: Sun Dec 27 18:55:05 2015 +0000 + + support use of configure --docdir argument + + Value of docdir was being overridden in Makefile. + + Retain compatibility with old Autoconf versions that do not provide + docdir. + +M Makefile.am +M configure.ac + +commit 050f2d28be8a63836bd74558b82be7983770a654 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Dec 12 18:27:40 2015 -0800 + + use alias attribute only for elf platforms + + It was reported that clang on OSX doesn't support alias + attribute. Most + likely because of executable format limitations. + + New code limits use of alias to gcc-compatible compilers on elf + platforms (various gnu and *bsd systems). Elf format is known + to support + aliases. + +M src/tcmalloc.cc + +commit 07b0b21ddd168d977c04b1395677de71bb3e402e +Author: cyshi <wbstsa@gmail.com> +Date: Wed Dec 2 14:47:15 2015 +0800 + + fix compilation error in spinlock + +M src/base/spinlock.cc + +commit e14450366abc51c7a25256615179daf0bae1de34 +Author: gshirishfree <gshirishfree@gmail.com> +Date: Mon Nov 23 11:34:13 2015 -0800 + + Added better description for GetStats API + +M src/gperftools/malloc_extension.h + +commit 64892ae730b704a7f3f23bd8eaeaf206901df201 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Nov 21 19:17:36 2015 -0800 + + lower default transfer batch size down to 512 + + Some workloads get much slower with too large batch size. + + This closes bug #678. + + binary_trees benchmark benefits from larger batch size. And I + found that + 512 is not much slower than huge value that we had. + +M src/common.cc + +commit 6fdfc5a7f40ebcff3fdaada1a2994ff54be2f9c7 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Oct 24 23:16:45 2015 -0700 + + implemented enabling sized-delete support at runtime + + Under gcc 4.5 or greater we're using ifunc function attribute + to resolve + sized delete operator to either plain delete implementation + (default) or + to sized delete (if enabled via environment variable + TCMALLOC_ENABLE_SIZED_DELETE). + +M configure.ac +M src/libc_override_gcc_and_weak.h +M src/tests/tcmalloc_unittest.sh + +commit c2a79d063c949584170b3e7dd2939a4548c16079 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Fri Oct 23 21:44:04 2015 -0700 + + use x86 pause in spin loop + + This saves power and improves performance, particulary on SMT. + +M src/base/spinlock.cc + +commit 0fb6dd8aa3c2fe3245c31c138ec4dfb05efd7f6d +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Fri Oct 23 22:09:25 2015 -0700 + + added binary_trees benchmark + +M .gitignore +M Makefile.am +A benchmark/binary_trees.cc + +commit a8852489e54e915d22abbdad1688d1f25ccb7925 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Oct 18 00:40:15 2015 -0700 + + drop unsupported allocation sampling code in tcmalloc_minimal + +M src/tcmalloc.cc +M src/thread_cache.h + +commit a9db0ae516c225543976280047b22b1e0ca08b93 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Oct 4 21:12:28 2015 -0700 + + implemented (disabled by default) sized delete support + + gcc 5 and clang++-3.7 support sized deallocation from C++14. We are + taking advantage of that by defining sized versions of operator + delete. + + This is off by default so that if some existing programs that + define own + global operator delete without sized variant are not broken by + tcmalloc's sized delete operator. + + There is also risk of breaking exiting code that deletes objects using + wrong class (i.e. base class) without having virtual destructors. + +M configure.ac +M src/gperftools/tcmalloc.h.in +M src/libc_override_gcc_and_weak.h +M src/libc_override_redefine.h +M src/tcmalloc.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit 88686972b9caf8fd132c4e30d89da78c3d324c27 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Oct 4 11:15:37 2015 -0700 + + pass -fsized-deallocation to gcc 5 + + Otherwise it gives warning for declaration of sized delete operator. + +M Makefile.am +M configure.ac + +commit 0a18fab3af2f1b59b851836d319a1eff0db5d44e +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Oct 4 21:07:54 2015 -0700 + + implemented sized free support via tc_free_sized + +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/tcmalloc.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in +M src/windows/patch_functions.cc + +commit 464688ab6ddb82db22ea9b934ddea192478e4535 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sun Oct 4 21:27:56 2015 -0700 + + speedup free code path by dropping "fast path allowed check" + +M src/tcmalloc.cc + +commit 10f7e207166bd6652ed8803b9885917bfbbd90d0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 3 18:47:14 2015 -0700 + + added SizeMap::MaybeSizeClass + + Because it allows us to first check for smaller sizes, which is most + likely. + +M src/common.h + +commit 436e1dea43dc097525e43a43e6d79810ba6921b5 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 3 15:40:21 2015 -0700 + + slightly faster GetCacheIfPresent + +M src/thread_cache.h + +commit 04df911915ecfcbc5e6b13fdab1b86f825ed1938 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 3 15:38:33 2015 -0700 + + tell compiler that non-empty hooks are unlikely + +M src/malloc_hook-inl.h + +commit 8cc75acd1f4024ca978858b1cf85b45204ed32b2 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Oct 4 11:04:12 2015 -0700 + + correctly test for -Wno-unused-result support + + gcc is only giving warning for unknown -Wno-XXX flags so test never + fails on gcc even if -Wno-XXX is not supported. By using + -Wunused-result we're able to test if gcc actually supports it. + + This fixes issue #703. + +M configure.ac + +commit 7753d8239b1e427d83729b6d5c0401e1d80308fd +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Oct 4 18:26:59 2015 -0700 + + fixed clang warning about shifting negative values + +M src/base/basictypes.h + +commit ae09ebb3833f3f207b7623245e360ddf122b823e +Author: Jens Rosenboom <j.rosenboom@x-ion.de> +Date: Thu Nov 19 17:45:44 2015 +0100 + + Fix tmpdir usage in heap-profiler_unittest.sh + + Using a single fixed directory would break when tests were being + run in + parallel with "make -jN". + + Also, the cleanup at the end of the test didn't work because it + referred + to the wrong variable. + +M src/tests/heap-profiler_unittest.sh + +commit df34e71b575daa6e4fa7588ae0deb9d529566f08 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Nov 21 16:03:09 2015 -0800 + + use $0 when referring to pprof + + This fixed debian bug #805536. Debian ships pprof under google-pprof + name so it is handy when google-pprof --help refers to itself + correctly. + +M src/pprof + +commit 7773ea64ee254700ef0ffc5673539f0e111a4f86 +Author: Adhemerval Zanella <adhemerval.zanella@linaro.com> +Date: Fri Nov 6 16:29:12 2015 -0200 + + Alignment fix to static variables for system allocators + + This patch the placement new for some system allocator to force the + static buffer to pointer value. + +M src/memfs_malloc.cc +M src/system-alloc.cc + +commit c46eb1f3d2f7a2bdc54a52ff7cf5e7392f5aa668 +Author: Boris Sazonov <bsazonov@gmail.com> +Date: Thu Oct 15 17:39:08 2015 +0300 + + Fixed printf misuse in pprof - printed string was passed as + format. Better use print instead + +M src/pprof + +commit 9bbed8b1a8f1229cf0f1b4185683f309267ee1b9 +Author: Boris Sazonov <bsazonov@gmail.com> +Date: Wed Sep 23 19:46:13 2015 +0300 + + Fixed assembler argument passing inside _syscall6 on MIPS - it was + causing 'Expression too complex' compilation errors in spinlock + +M src/base/linux_syscall_support.h + +commit 962aa53c55968f62f548f6f6f4849a384b505ca0 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Oct 10 20:31:50 2015 -0700 + + added more fastpath microbenchmarks + + This also makes them output nicer results. I.e. every benchmark is + run 3 + times and iteration duration is printed for every run. + + While this is still very synthetic and unrepresentave of malloc + performance + as a whole, it is exercising more situations in tcmalloc fastpath. So + it a + step forward. + +M Makefile.am +M benchmark/malloc_bench.cc +A benchmark/run_benchmark.c +A benchmark/run_benchmark.h + +commit 347a830689e4dba2adc1368d00fe4723ba726b4a +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Oct 10 15:58:11 2015 -0700 + + Ensure that PPROF_PATH is set for debugallocation_test + + Which fixes issue #728. + +M src/tests/debugallocation_test.sh + +commit a9059b7c30308a4f4ab25f00e55b67c649614ed3 +Author: Aliaksey Kandratsenka <alkondratenko@gmail.com> +Date: Sat Oct 10 15:35:54 2015 -0700 + + prevent clang from inlining Mallocer in heap checker unittest + + Looks like existing "trick" to avoid inlining doesn't really prevent + sufficiently smart compiler from inlining Mallocer function. Which + breaks tests, since test relies Mallocer having it's own separate + stack + frame. + + Making mallocer_addr variable volatile is seemingly enough to + stop that. + +M src/tests/heap-checker_unittest.cc + +commit 6627f9217d8897b297c6da038cfbcff6a3086cfa +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 26 19:52:37 2015 -0700 + + drop cycleclock + +M Makefile.am +D src/base/cycleclock.h +M src/base/sysinfo.cc +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/packed-cache_test/packed-cache_test.vcproj + +commit f985abc29607b8a5662ddac7bd1ed99c71ceeb11 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 26 19:50:03 2015 -0700 + + amputate unportable and unused stuff from sysinfo + + We still check number of cpus in the system (in spinlock code), + but old + code was built under assumption of "no calls malloc" which is + not needed + in tcmalloc. Which caused it to be far more complicated than + necessary (parsing procfs files, ifdefs for different OSes and + arch-es). + + Also we don't need clock cycle frequency measurement. + + So I've removed all complexity of ald code and NumCPUs function and + replaced it with GetSystemCPUsCount which is straightforward and + portable call to sysconf. + + Renaming of cpus count function was made so that any further code that + we might port from Google that depends on old semantics of NumCPUs + will + be detected at compile time. And has to be inspected for whether it + really needs that semantics. + +M src/base/spinlock.cc +M src/base/sysinfo.cc +M src/base/sysinfo.h + +commit 16408eb4d71ecbb1dd3b3c7ff22c94bd254d7b58 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Sep 29 11:59:25 2015 -0700 + + amputated wait_cycles accounting in spinlocks + + This is not needed and pulls in CycleClock dependency that lowers + code portability. + +M src/base/spinlock.cc +M src/base/spinlock.h + +commit fedceef40cd217ef406bc2522e9a8f879d60b0c0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Sep 29 11:55:35 2015 -0700 + + drop cycleclock reference in ThreadCache + +M src/thread_cache.cc + +commit d7fdc3fc9ddc0937eb7961b1d8c864cc8182f0d9 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Sep 29 11:43:09 2015 -0700 + + dropped unused and unsupported synchronization profiling facility + + Spinlock usage of cycle counter is due do tracking of time it's spent + waiting for lock. But this tracking is only useful we actually have + synchronization profiling working, which dont have. Thus I'm dropping + calls to this facility with eye towards further removal of cycle clock + usage. + +M Makefile.am +M src/base/spinlock.cc +D src/base/synchronization_profiling.h + +commit 3a054d37c1f5323462bd77f55be02c5b0d764611 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Mon Sep 28 15:40:15 2015 -0700 + + dropped unused SpinLockWait function + +M src/base/spinlock.cc +M src/base/spinlock_internal.cc +M src/base/spinlock_internal.h + +commit 5b62d38329f04ee30fb475a36b552c1ebc4e1e79 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Mon Sep 28 10:27:49 2015 -0700 + + avoid checking for dup. entries on empty backtrace + + This might fix issue #721. But it is right thing to do + regardless. Since + if depth is 0 we'll be reading random "garbage" on the stack. + +M src/profiler.cc + +commit 7b9ded722e4cef9a44b8d8bfa80d3e1e108cf590 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 12 16:26:54 2015 -0700 + + fixed compiler warning in memory_region_map.cc + +M src/memory_region_map.cc + +commit 4194e485cbb5d8c59f65aba49da63c08ecc573da +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 12 16:20:53 2015 -0700 + + Don't link libtcmalloc_minimal.so to libpthread.so + + So that LD_PRELOAD-ing doesn't force loading libpthread.so which may + slow down some single-threaded apps. + + tcmalloc already has maybe_threads facility that can detect if + libpthread.so is loaded (via weak symbols) and provide 'simulations' + of + some pthread functions that tcmalloc needs. + +M Makefile.am +M src/maybe_threads.cc +M src/maybe_threads.h +M src/static_vars.cc + +commit 121038308d8c5b34707614c44de265816a322563 +Author: Fredrik Mellbin <fredrik.mellbin@gmail.com> +Date: Sun Sep 27 01:18:31 2015 +0200 + + Check if _MSC_VER is defined to avoid warnings + +M src/windows/port.h + +commit 73673229955cf35c5b3046ee3100d94e82d33bc2 +Author: Fredrik Mellbin <fredrik.mellbin@gmail.com> +Date: Sun Sep 27 01:11:14 2015 +0200 + + Make default config.h work with VS2015 + +M src/windows/config.h + +commit ae0a444db06b2327441e6160eafa33d8b7b95629 +Author: Dair Grant <dair@feralinteractive.com> +Date: Thu Oct 1 13:46:22 2015 +0100 + + Ensure ThreadCache objects are CACHELINE_ALIGNED. + +M src/base/basictypes.h +M src/common.cc +M src/system-alloc.cc + +commit ea0b1d315497771d9d077c754fb9d6a92d1c01dc +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 26 11:13:13 2015 -0700 + + unbreak TestErrno again + + Somehow in previous commit I managed to break assignment of memalign + result to variable being tested. Luckily gcc gave me warning. + +M src/tests/tcmalloc_unittest.cc + +commit e53aef24add50bdee5ab2943d96e5c5b2b3bf596 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 26 09:58:45 2015 -0700 + + don't try to test memalign on windows + + This unbreaks tcmalloc_unittest.cc on windows. + +M src/tests/tcmalloc_unittest.cc + +commit 7707582448d5bdf311d88726c5f77fcbb0bedd09 +Merge: 5078abd 9eb63bd +Author: Aliaksey Kandratsenka (aka Aliaksei Kandratsenka) +<alkondratenko@gmail.com> +Date: Sat Sep 26 09:43:50 2015 -0700 + + Merge pull request #717 from myrsloik/master + + Use correct mangled new and delete symbols on windows x64 + +commit 9eb63bddfb7a8a7b086bb3daceeeaa2e3b1d8f67 +Author: Fredrik Mellbin <fredrik.mellbin@gmail.com> +Date: Thu Sep 24 19:57:26 2015 +0200 + + Use correct mangled new and delete symbols on windows x64 + +M src/windows/patch_functions.cc + +commit 5078abdb331e63d7a216994f186eb736861f8df7 +Author: fdeweerdt <fdeweerdt@cloudmark.com> +Date: Tue Aug 18 09:27:39 2015 -0700 + + Don't discard curl options if timeout is not defined. + + Editing the options passed to curl via 'my @URL_FETCHER = ("curl", + "-s");' (in particular to add a -k to ignore self signed certs) fails + for some invocations of curl. In FetchDynamicProfile, 'my @fetcher = + AddFetchTimeout($fetch_timeout, @URL_FETCHER);' ends up being just + 'curl' if timeout is not defined. + + This happens because AddFetchTimeout doesn't retrieve all the + arguments + from the caller. + + [alk@tut.by: updated commit message] + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M src/pprof + +commit 54505f1d50c2d1f4676f5e87090b64a117fd980e +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 2 19:28:03 2015 -0700 + + help clang with inlining important fast-path functions + + Clang's recent focus on code size doesn't help us in malloc fast-path + because somehow clang completely ignores inline directives. + + In order to help clang generate code that was actually intended by + original authors, we're adding always_inline attribute to key + fast-path functions. + + Clang also guessed likely branch "wrong" in couple places. Which is + now addressed by UNLIKELY declarations there. + +M src/tcmalloc.cc +M src/thread_cache.h + +commit 73c0c8c61b84e268bafd961bf304b2e4d296142f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 2 19:09:02 2015 -0700 + + moved do_mallor_or_cpp_alloc in better place + +M src/tcmalloc.cc + +commit 41aca070e85258d9d47b0ac47f5eddece8bf45ba +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 2 10:53:32 2015 -0700 + + always set errno to ENOMEM on OOM condition and in single place + + While standards do not require us to set errno to ENOMEM in certain + places (like posix_memalign), existing code may sometimes set it + (i.e. because mmap or sbrk couldn't get memory from kernel) + anyways. And from my reading of glibc, it's malloc is doing more or + less same by just always setting ENOMEM on OOM condition. + + This commit also eliminates some functions (XXX_no_errno) that are not + needed anymore. + +M src/debugallocation.cc +M src/page_heap.cc +M src/tcmalloc.cc + +commit c4493874cd3b662d2778f3b79a3096ae61569b67 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 1 22:20:15 2015 -0700 + + deal with OOM handling in one place and prior to returning result + + This commit removes 4 (four!) duplicates of C++ OOM handling. And + introduces one helper for that. + + Other change is that malloc doesn't have to check tc_new_mode anymore + until it _actually_ deals with OOM condition. Which shaves off couple + instructions from fast-path. + +M src/debugallocation.cc +M src/tcmalloc.cc + +commit 09448a8fe977eaa083340c2504caac8820832179 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 2 18:11:04 2015 -0700 + + added tcmalloc_unittest path with TCMALLOC_HEAP_LIMIT_MB=512 + + I.e. so that we can exercise "voluntary" OOM conditions better. + +M src/tests/tcmalloc_unittest.sh + +commit 73fb7c7eb37e488bab8298be4110e30d83af2184 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 2 01:47:55 2015 -0700 + + added test on errno = ENOMEM on out of memory + +M src/tests/tcmalloc_unittest.cc + +commit eb725ff26371dfb5ae2523802c6abe75833cacef +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 1 20:54:39 2015 -0700 + + unbreak heap-profiler-unittest on gcc 5 + + gcc 5 has got nice new optimization (-fipa-icf) which merges identical + functions into one. And that causes heap-profiler_unittest to fail + since it expects to see both Allocate and Allocate2 in heap + profiles. And smart GCC detects that they are same function and makes + one function out of two and thus breaks this test. + + New code simply adds (disabled) logging calls to make those functions + non-identical. + +M src/tests/heap-profiler_unittest.cc + +commit 53833298f3822b2b8b78c9dc85160d65d78a6857 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 1 19:46:34 2015 -0700 + + unbreak heap_checker_unittest on gcc 5 + + GCC 5 ended up too smart and optimized out assignment of allocated + block to global variable. Which caused test to fail since it triggered + unexpected "leak". + +M src/tests/heap-checker_unittest.cc + +commit 024bae96ce8e1591993fc0da191ce0a92d609481 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 1 17:41:42 2015 -0700 + + dropped support for PREANSINEW define which nobody needs anymore + +M src/debugallocation.cc +M src/tcmalloc.cc + +commit 64e0133901a20f83c41adb36748fd19d21228515 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun May 3 12:55:47 2015 -0700 + + added trivial malloc fast-path benchmark + + While this is not good representation of real-world production malloc + behavior, it is representative of length (instruction-wise and well as + cycle-wise) of fast-path. So this is better than nothing. + +M .gitignore +M Makefile.am +A benchmark/malloc_bench.cc + +commit e1d1311cfb6312cd44e086c879f3e95cbfa0eb9d +Author: Tom Conerly <tomconerly@gmail.com> +Date: Mon Jul 27 11:35:29 2015 -0700 + + issue-699: Free list on error paths + +M src/heap-profile-table.cc + +commit b5b79860fd2b8e1a9b0573e93f942695f2992b59 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 1 11:24:56 2015 -0700 + + issue-702: correctly declare arg-less functions in profiler.h + + This is patch by user mitchblank. + + From his words: + + The problem is pretty simple. Ancient C code allowed declarations + without argument prototypes, i.e. + + int foo(); + + For compatibility this is still accepted. If you want to declare a + function with zero prototypes the correct way to do it is: + + int foo(void); + + C++ also accepts this syntax, but it's not needed there. + + Normally compilers still accept the old-style entries, but with + sufficient warning flags gcc will complain about them. It is good for + header files to have the explicit "void" argument so all compilers are + kept happy. + + I'm attaching a simple patch to add the "void" parameter to that file. + I haven't checked if other headers have the same problem (I'm just + using the profiler at the moment) + + <end of quote> + + In fact "int foo()" means "foo accepts any args" and we really want + "foo has no args". For which int foo (void) is right declaration. + +M src/gperftools/profiler.h + +commit 7df7f14c949d89d9c3f5c7c339bbdda81fb8abc7 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 13 21:35:06 2015 -0700 + + issue-693: enable futex usage on arm + + This patch was contributed by user spotrh. + +M src/base/linux_syscall_support.h +M src/base/spinlock_linux-inl.h + +commit cb998e56d763cfe901cf30a692d4cfd4f85259ae +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 13 21:28:28 2015 -0700 + + issue-693: convert sys_futex to it's 6-arg form + + Because sys_futex actually takes 6 args in more recent kernels (even + though last two args are unused for FUTEX_{WAKE,WAIT}. + + This is patch contributed by user spotrh. + +M src/base/linux_syscall_support.h + +commit 36066b8df4bc516ade5209a1f60bd84d6448b531 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 27 21:15:30 2015 -0700 + + issue-695: implementated TCMALLOC_TRACE_FILE variable + + This is contributed by Paolo Bonzini. + + This commit adds TCMALLOC_TRACE_FILE environment variable, which if + defined overrides location of malloc trace file. + +M src/debugallocation.cc + +commit c4069d2d37b67296d675c2d1de42a46dc6d43efc +Author: Brian Silverman <bsilver16384@gmail.com> +Date: Tue Jun 16 16:56:48 2015 -0700 + + Add empty virtual destructor to class with virtual methods. + + Clang 3.5 has a warning about deleting objects with virtual methods + through non-virtual destructors which was triggered. I'm not sure + whether this actually creates any undefined or otherwise incorrect + behavior, but it seems like a good thing to fix regardless. + + Example compiler warning: + third_party/gperftools/src/tests/profile-handler_unittest.cc:282:5: + error: + delete called on '(anonymous namespace)::BusyThread' that has + virtual + functions but non-virtual destructor [-Wdelete-non-virtual-dtor] + delete busy_worker_; + ^ + +M src/tests/profile-handler_unittest.cc + +commit 019362fefcdca39a46d1a831ed46266c24b9a273 +Author: Patrick LoPresti <lopresti@gmail.com> +Date: Mon May 25 11:56:51 2015 -0700 + + Add support for CPUPROFILE_TIMER_SIGNAL environment variable. + + Which both enables per-thread timers and allows the signal number for + the timer to be selected. + + [alk@tut.by: reformatted commit message for subject line length] + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M src/profile-handler.cc +M src/tests/profile-handler_unittest.cc + +commit 81d8d2a9e7f941a2051781fd0fe62c683c32f1ef +Author: Milton Chiang <milton.chiang@mediatek.com> +Date: Wed May 13 21:53:05 2015 +0800 + + Add "ARMv8-A" to the supporting list of ARM architecture. + +M src/base/arm_instruction_set_select.h + +commit 64d1a86cb8da245c982d470a7dfdd635197e6e5e +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 9 15:38:12 2015 -0700 + + include time.h for struct timespec on Visual Studio 2015 + + This patch was submitted by user wmamrak. + +M src/windows/port.h + +commit 7013b219970a329d1db58fbd7fa7c907bec8dbba +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 9 12:48:11 2015 -0700 + + hook mi_force_{un,}lock on OSX instead of pthread_atfork + + This is patch by Anton Samokhvalov. + + Apparently it helps with locking around forking on OSX. + +M src/libc_override_osx.h +M src/static_vars.cc + +commit f25f8e0bf2d361f852929848d79d7ba54586c352 +Author: Angus Gratton <gus@projectgus.com> +Date: Mon May 4 08:18:43 2015 +1000 + + Clarify that only tcmalloc_minimal is supported on Windows. + +M INSTALL +M README_windows.txt + +commit 772a686c45455893708178d3b59b1d3b571015aa +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun May 3 13:15:16 2015 -0700 + + issue-683: fix compile error in clang with -m32 and 64-bit off_t + +M src/malloc_hook_mmap_linux.h + +commit 0a3bafd645764250732f3c0627534142568f6f1f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 11 10:35:53 2015 -0700 + + fix typo in PrintAvailability code + + This is patch contributed by user ssubotin. + +M src/windows/nm-pdb.c + +commit 6ce10a2a05f13803061538d5c77e89695de59be4 +Author: Matt Cross <mat.cross@gmail.com> +Date: Thu Mar 26 16:13:56 2015 -0400 + + Add support for printing collapsed stacks for generating flame graphs. + +M src/pprof + +commit 2c1a165fa56a6f8dff8fa2662ceda971ad676ead +Author: Matt Cross <matt.cross@gmail.com> +Date: Thu Mar 26 12:10:23 2015 -0400 + + Add support for reading debug symbols automatically on systems + where shared libraries with debug symbols are installed at + "/usr/lib/debug/<originalpath>.debug", such as RHEL and CentOS. + +M src/pprof + +commit 2e654956287043a30aeaec20e5c19650358af618 +Author: Jonathan Lambrechts <jonathan@aljl.eu> +Date: Fri Feb 13 18:52:21 2015 +0100 + + callgrind : handle inlined functions + +M src/pprof + +commit 90d7408d381cf2ad68ce4974cbccd51ed5222ca3 +Author: Jonathan Lambrechts <jonathan@aljl.eu> +Date: Fri Feb 13 18:51:33 2015 +0100 + + pprof : callgrind : fix unknown files + +M src/pprof + +commit aa963a24ae7a74b095a631ea6a86cd071c453911 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Mon Feb 9 08:35:03 2015 -0800 + + issue-672: fixed date of news entry of gperftools 2.4 release + + It is 2015 and not 2014. Spotted and reported by Armin Rigo. + +M NEWS + +commit c66aeabdbacbfd3aff7a6633f34526ca32642f67 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 10 14:35:54 2015 -0800 + + fixed default value of HEAP_PROFILER_TIME_INTERVAL in .html doc + +M doc/heapprofile.html + +commit 689e4a5bb4b2a8afecb85e83b8e4f294f80b6124 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 10 12:26:51 2015 -0800 + + bumped version to 2.4 + +M NEWS +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 3f5f1bba0c2cb4862c38b0c9050f1cede8a5c344 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 28 18:18:27 2014 -0800 + + bumped version to 2.4rc + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit c4dfdebc796c2e802db686a7eea483f3d31edbcf +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 28 17:53:02 2014 -0800 + + updated NEWS for gperftools 2.4rc + +M NEWS + +commit 0096be5f6f0bb2a01438ee78b7bb6158ffa5a1fb +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 17:17:41 2014 -0800 + + pprof: allow disabling auto-removal of "constant 2nd frame" + + "constand 2nd frame" feature is supposed to detect and workaround + incorrect cpu profile stack captures where parts of or whole cpu + profiling signal handler frames are not skipped. + + I've seen programs where this feature incorrectly removes non-signal + frames. + + Plus it actually hides bugs in stacktrace capturing which we want be + able to spot. + + There is now --no-auto-signal-frm option for disabling it. + +M src/pprof + +commit 4859d8020579cd8db8b1f8bc6af382104b38f96f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 19:56:13 2014 -0800 + + cpuprofiler: drop correct number of signal handler frames + + We actually have 3 and not 2 of them. + +M src/profiler.cc + +commit 812ab1ee7e4365e6a9568834c7e8f4aef10018fb +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 19:53:12 2014 -0800 + + pprof: eliminate duplicate top frames if dropping signal frames + + In cpu profiles that had parts of signal handler we could have + situation like that: + + * PC + * signal handler frame + * PC + + Specifically when capturing stacktraces via libunwind. + + For such stacktraces pprof used to draw self-cycle in functions + confusing everybody. Given that me might have a number of such + profiles in the wild it makes sense to treat that duplicate PC issue. + +M src/pprof + +commit e6e78315e4761ad121a5eeb4fdffe3571d81ac17 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 19:04:43 2014 -0800 + + cpuprofiler: better explain deduplication of top stacktrace entry + +M src/profiler.cc + +commit 24b8ec28464712bc124af5655ebf877fb3f79032 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 17:10:38 2014 -0800 + + cpuprofiler: disable capturing stacktrace from signal's ucontext + + This was reported to cause problems due to libunwind occasionally + returning top level pc that is 1 smaller than real pc which causes + problems. + +M src/stacktrace_libunwind-inl.h + +commit 83588de7204124c8d60703f169c2f3af8aa0ce5f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 17:04:10 2014 -0800 + + pprof: added support for dumping stacks in --text mode + + Which is very useful for diagnosing stack capturing and processing + bugs. + +M src/pprof + +commit 2f29c9b06220ce6ad75d5ab2b98e9f63ade79eea +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 17:03:36 2014 -0800 + + pprof: made --show-addresses work + +M src/pprof + +commit b8b027d09a53dd6292d2f7c45aec40198628a808 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Dec 23 10:55:22 2014 -0200 + + Make PPC64 use 64K of internal page size for tcmalloc by default + + This patch set the default tcmalloc internal page size to 64K when + built on PPC. + +M configure.ac + +commit 3f55d874be8812aef9f0f567048188584962b4c1 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Dec 23 10:29:49 2014 -0200 + + New configure flags to set the alignment and page size of tcmalloc + + Added two new configure flags, --with-tcmalloc-pagesize and + --with-tcmalloc-alignment, in order to set the tcmalloc internal page + size and tcmalloc allocation alignment without the need of a compiler + directive and to make the choice of the page size independent of the + allocation alignment. + +M INSTALL +M configure.ac +M src/common.h + +commit 1035d5c18f64d114ac790b92a96f3b3a1a301eb9 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 21 19:52:34 2014 -0800 + + start building malloc_extension_c_test even with static linking + + Comment in Makefile.am stating that it doesn't work with static + linking is not accurate anymore. + +M Makefile.am + +commit d570a6391cf4c5a5570e22ada5cf0b324c7b8dfd +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 21 19:33:25 2014 -0800 + + unbreak malloc_extension_c_test on clang + + Looks like even force_malloc trick was not enough to force clang to + actually call malloc. I'm now calling tc_malloc directly to prevent + that smartness. + +M src/tests/malloc_extension_c_test.c + +commit 4ace8dbbe2e04da1029a539c72b90dee1724c33f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 21 18:49:47 2014 -0800 + + added subdir-objects automake options + + This is suggested by automake itself regarding future-compat. + +M .gitignore +M Makefile.am + +commit f72e37c3f99d942dd648e392abc65f294aa94fa8 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 21 18:27:03 2014 -0800 + + fixed C++ comment warning in malloc_extension_c.h from C compiler + +M src/gperftools/malloc_extension_c.h + +commit f94ff0cc0943c7b7bb9390f9d83a57c9d9e11fd0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 14:41:36 2014 -0800 + + made AtomicOps_x86CPUFeatureStruct hidden + + So that access to has_sse2 is faster under -fPIC. + +M src/base/atomicops-internals-x86.h + +commit 987a724c23df4b29eb457e96bb5c1bbab15dc90e +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 14:20:20 2014 -0800 + + dropped atopmicops workaround for irrelevant Opteron locking bug + + It's not cheap at all when done in this way (i.e. without runtime + patching) and apparently useless. + + It looks like Linux kernel never got this workaround at all. See + bugzilla ticket: https://bugzilla.kernel.org/show_bug.cgi?id=11305 + + And I see no traces of this workaround in glibc either. + + On the other hand, opensolaris folks apparently still have it (or + something similar, based on comments on linux bugzilla) in their code: + https://github.com/illumos/illumos-gate/blob/32842aabdc7c6f8f0c6140a256cf42cf5404fefb/usr/src/uts/i86pc/os/mp_startup.c#L1136 + + And affected CPUs (if any) are from year 2008 (that's 6 years now). + + Plus even if somebody still uses those cpus (which is unlikely), they + won't have working kernel and glibc anyways. + +M src/base/atomicops-internals-x86.cc +M src/base/atomicops-internals-x86.h + +commit 7da5bd014d77ddaf694054b1e3ae0a3ef92ab384 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 20 21:18:07 2014 -0800 + + enabled aggressive decommit by default + + TCMALLOC_AGGRESSIVE_DECOMMIT=f is one way to disable it and + SetNumericProperty is another. + +M src/static_vars.cc +M src/tests/tcmalloc_unittest.sh + +commit 51b0ad55b3267caff6cd2d25815bfb913179b526 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 17:36:49 2014 -0800 + + added basic unit test for singular malloc hooks + +M src/tests/malloc_extension_c_test.c + +commit bce72dda078d2cb3e9745077f9903e642a966131 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 17:12:05 2014 -0800 + + inform compiler that tcmalloc allocation sampling is unlikely + + Now compiler generates slightly better code which produces jump-less + code for common case of not sampling allocations. + +M src/tcmalloc.cc + +commit 4f051fddcd7af53e1607b6a4866ffa461a0033ef +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 15 13:18:28 2014 -0700 + + eliminated CheckIfKernelSupportsTLS + + We don't care about pre-2.6.0 kernels anymore. So we can assume that + if compile time check worked, then at runtime it'll work. + +M src/tcmalloc.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/port.cc + +commit 81291ac3992ec7500faf2026c72feb80c9634dc3 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 16:48:45 2014 -0800 + + set elf visibility to hidden for malloc hooks + + To speed up access to them under -fPIC. + +M src/malloc_hook-inl.h + +commit 105c004d0c84137f32cc71b8d3f7899fcc8c2e72 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 14:40:30 2014 -0800 + + introduced ATTRIBUTE_VISIBILITY_HIDDEN + + So that we can disable elf symbol interposition for certain + perf-sensitive symbols. + +M src/base/basictypes.h + +commit 6a6c49e1f53df3d00a0661a86090534146ce686c +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 16:23:06 2014 -0800 + + replaced separate singular malloc hooks with faster HookList + + Specifically, we can now check in one place if hooks are set at all, + instead of two places. Which makes fast path shorter. + +M src/malloc_hook-inl.h +M src/malloc_hook.cc + +commit ba0441785bae214566008e69adcd915800d9dbb3 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 16:13:54 2014 -0800 + + removed extra barriers in malloc hooks mutation methods + + Because those are already done under spinlock and read-only and + lockless Traverse is already tolerant to slight inconsistencies. + +M src/malloc_hook.cc + +commit 890f34c77ef79dfe1e00ce36a3f91aee7fe759b7 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 9 16:08:59 2014 -0800 + + introduced support for deprecated singular hooks into HookList + + So that we can later drop separate singular hooks. + +M src/malloc_hook-inl.h +M src/malloc_hook.cc + +commit 81ed7dff11de915b12c4111d403e52c81c786f82 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 7 13:33:40 2014 -0800 + + returned date of 2.3rc in NEWS back + +M NEWS + +commit 463a619408219fff8955d47a71de1aab31ebd129 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 7 12:53:35 2014 -0800 + + bumped version to 2.3 + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 76e8138e12567e2ee1f638e8fcffadc2ca1de83d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Dec 7 12:46:49 2014 -0800 + + updated NEWS for gperftools 2.3 + +M NEWS + +commit 8eb4ed785ae883acb6425fd980e9d3e6bdcab89d +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Thu Nov 27 14:11:09 2014 -0200 + + Added option to disable libunwind linking + + This patch adds a configure option to enable or disable libunwind + linking. + The patch also disables libunwind on ppc by default. + +M configure.ac + +commit 3b94031d21fac39ce5be820f4f8bc37626ed4c08 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Thu Nov 27 11:48:08 2014 -0800 + + compile libunwind unwinder only of __thread is supported + + This fixed build on certain OSX that I have access to. + +M src/stacktrace.cc + +commit 3ace468202a8647a16134d0bb42fa497fa3ec0d4 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Thu Nov 27 10:43:11 2014 -0800 + + issue-658: correctly close socketpair fds when socketpair fails + + This applies patch by glider. + +M src/symbolize.cc + +commit e7d5e512b068fb76f5eeed7985bebd9c0d14e226 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 2 20:02:12 2014 -0800 + + bumped version to 2.3rc + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 1d44d378513a8782c058833118b664f38214810b +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 2 19:38:29 2014 -0800 + + updated NEWS for gperftools 2.3rc + +M NEWS + +commit 1108d83cf4a1692fce3b736e16d3e98c33329177 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Sep 7 13:09:14 2014 -0700 + + implemented cpu-profiling mode that profiles threads separately + + Default mode of operation of cpu profiler uses itimer and + SIGPROF. This timer is by definition per-process and no spec defines + which thread is going to receive SIGPROF. And it provides correct + profiles only if we assume that probability of picking threads will be + proportional to cpu time spent by threads. + + It is easy to see, that recent Linux (at least on common SMP hardware) + doesn't satisfy that assumption. Quite big skews of SIGPROF ticks + between threads is visible. I.e. I could see as big as 70%/20% + division instead of 50%/50% for pair of cpu-hog threads. (And I do see + it become 50/50 with new mode) + + Fortunately POSIX provides mechanism to track per-thread cpu time via + posix timers facility. And even more fortunately, Linux also provides + mechanism to deliver timer ticks to specific threads. + + Interestingly, it looks like FreeBSD also has very similar facility + and seems to suffer from same skew. But due to difference in a way + how threads are identified, I haven't bothered to try to support this + mode on FreeBSD. + + This commit implements new profiling mode where every thread creates + posix timer which tracks thread's cpu time. Threads also also set up + signal delivery to itself on overflows of that timer. + + This new mode requires every thread to be registered in cpu + profiler. Existing ProfilerRegisterThread function is used for that. + + Because registering threads requires application support (or suitable + LD_PRELOAD-able wrapper for thread creation API), new mode is off by + default. And it has to be manually activated by setting environment + variable CPUPROFILE_PER_THREAD_TIMERS. + + New mode also requires librt symbols to be available. Which we do not + link to due to librt's dependency on libpthread. Which we avoid due + to perf impact of bringing in libpthread to otherwise single-threaded + programs. So it has to be either already loaded by profiling program + or LD_PRELOAD-ed. + +M Makefile.am +M configure.ac +M src/maybe_threads.cc +M src/maybe_threads.h +M src/profile-handler.cc +M src/tests/profile-handler_unittest.cc + +commit 714bd93e42535e759716324a90fbb395506499d2 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 10 20:28:11 2014 -0700 + + drop workaround for too old redhat 7 + + Note that this is _not_ RHEL7 but original redhat 7 from early 2000s. + +M configure.ac + +commit 8de46e66fcd2577758ab297b553bb0f468d8a97a +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 10 19:38:07 2014 -0700 + + don't add leaf function twice to profile under libunwind + +M src/profiler.cc + +commit 2e5ee0488996437aeef2028ad95d969b56abcad1 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 10 12:19:45 2014 -0700 + + pprof: indicate if using remote profile + + Missing profile file is common source of confusion. So a bit more + clarify is useful. + +M src/pprof + +commit 6efe96b41c9531f68f806faa0464445f884178ce +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 11 15:09:18 2014 -0700 + + issue-493: correctly detect __ARM_ARCH_6ZK__ for MemoryBarrier + + Which should fix issue reported by user pedronavf + +M src/base/atomicops-internals-arm-v6plus.h + +commit 8e97626378d5c4151a480dea1964e25969c9311d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Nov 2 11:28:30 2014 -0800 + + issue-655: use safe getenv for aggressive decommit mode flag + + Because otherwise we risk deadlock due to too early use of getenv on + windows. + +M src/base/commandlineflags.h +M src/static_vars.cc + +commit 8c3dc52fcfe02412a529769a22cbc75388a5d368 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 18 16:35:57 2014 -0700 + + issue-654: [pprof] handle split text segments + + This applies patch by user simonb. + + Quoting: + + Relocation packing splits a single executable load segment into two. + Before: + + LOAD 0x000000 0x00000000 0x00000000 0x2034d28 0x2034d28 + R E 0x1000 + LOAD 0x2035888 0x02036888 0x02036888 0x182d38 0x1a67d0 + RW 0x1000 + + After: + LOAD 0x000000 0x00000000 0x00000000 0x14648 0x14648 R + E 0x1000 + LOAD 0x014648 0x0020c648 0x0020c648 0x1e286e0 0x1e286e0 + R E 0x1000 + ... + LOAD 0x1e3d888 0x02036888 0x02036888 0x182d38 0x1a67d0 + RW 0x1000 + + The .text section is in the second LOAD, and this is not at + offset/address zero. The result is that this library shows up in + /proc/self/maps as multiple executable entries, for example (note: + this trace is not from the library dissected above, but rather from an + earlier version of it): + + 73b0c000-73b21000 r-xp 00000000 b3:19 786460 + /data/.../libchrome.2160.0.so + 73b21000-73d12000 ---p 00000000 00:00 0 + 73d12000-75a90000 r-xp 00014000 b3:19 786460 + /data/.../libchrome.2160.0.so + 75a90000-75c0d000 rw-p 01d91000 b3:19 786460 + /data/.../libchrome.2160.0.so + + When parsing this, pprof needs to merge the two r-xp entries above + into a single entry, otherwise the addresses it prints are incorrect. + + The following fix against 2.2.1 was sufficient to make pprof --text + print the correct output. Untested with other pprof options. + +M src/pprof + +commit 44c61ce6c4c713b194330641f400bbf64fd2abec +Author: Ricardo M. Correia <rcorreia@wizy.org> +Date: Wed Oct 8 04:39:14 2014 +0200 + + Fix parsing /proc/pid/maps dump in CPU profile data file + + When trying to use pprof on my machine, the symbols of my program were + not being recognized. + + It turned out that pprof, when calculating the offset of the text list + of mapped objects (the last section of the CPU profile data file), was + assuming that the slot size was always 4 bytes, even on 64-bit + machines. + + This led to ParseLibraries() reading a lot of garbage data at the + beginning of the map, and consequently the regex was failing to + match on + the first line of the real (non-garbage) map. + +M src/pprof + +commit 2a28ef24ddf8013bff59914b10902f1fb07bf9b2 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 6 16:49:24 2014 -0700 + + Added remaining memory allocated info to 'Exiting' dump message + + This applies patch by user yurivict. + +M src/heap-profiler.cc + +commit bbf346a856d4a7c5c2ab0e65d7cccf3dc1f23f13 +Author: Adam McNeeney <adam@meliorist.co.uk> +Date: Fri Aug 22 10:01:24 2014 +0100 + + Cope with new addr2line outputs for DWARF4 + + Copes with ? for line number (converts to 0). + Copes with (discriminator <num>) suffixes to file/linenum (removes). + + Change-Id: I96207165e4852c71d3512157864f12d101cdf44a + +M src/pprof + +commit b08d760958dbacc8da822f7f3db76d58c95a114d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 23 14:45:08 2014 -0700 + + issue-641: Added --show_addresses option + + This applies patch by user yurivict. + +M src/pprof + +commit 3c326d9f200a527bba45f1d222aa6aff20d42bc3 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Aug 19 08:14:08 2014 -0700 + + issue-644: fix possible out-of-bounds access in GetenvBeforeMain + + As suggested by user Ivan L. + +M src/base/sysinfo.cc + +commit f1ae3c446f34f6e8ac901caba281051ec269e4df +Author: jiakai <jia.kai66@gmail.com> +Date: Mon Jul 28 11:28:03 2014 -0700 + + Add an option to allow disabling stripping template argument in pprof + +M src/pprof + +commit a12890df2519d254d1c497b8e0a65bb8fc9e1ab2 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jul 26 14:12:42 2014 -0700 + + issue-635: allow whitespace in libraries paths + + This applies change suggested by user mich...@sebesbefut.com + +M src/pprof + +commit d5e36788d8bc626403dc020a86213cfc740ee73d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jul 26 13:59:22 2014 -0700 + + issue-636: fix prof/web command on Windows/MinGW + + This applies patch sent by user chaishushan. + +M src/pprof + +commit 4b788656bb9c480640d917d27d8a94a5eae436f5 +Author: Michael Pasieka <michael.pasieka@highwinds.com> +Date: Sun Jul 13 18:09:35 2014 -0700 + + added option to display stack traces in output for heap checker + + Quoting from email: + + I had the same question as William posted to stack overflow back on + Dec 9,2013: How to display symbols in stack trace of google-perftools + heap profiler (*). I dug into the source and realized the + functionality was not there but could be added. I am hoping that + someone else will find this useful/helpful. + + The patch I created will not attach so I am adding below. + + Enjoy! + + -- Michael + + * + http://stackoverflow.com/questions/20476918/how-to-display-symbols-in-stack-trace-of-google-perftools-heap-profiler + +M src/pprof + +commit 3abb5cb819bafe7004363f041c194afd827cb053 +Author: WenSheng He <zhsBernie@gmail.com> +Date: Mon Jun 30 14:39:13 2014 +0800 + + issue-630: The env var should be "CPUPROFILE" + + To enable cpu profile, the env var should be "CPUPROFILE", not + "PROFILE" + actually. + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M src/gperftools/profiler.h + +commit fd81ec257883c6d5486e4568c955dff86dbed5c8 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 28 13:05:12 2014 -0700 + + issue-631: fixed miscompilation of debugallocation without mmap + + This applies patch sent by user iamxujian. + + Clearly, when I updated debugallocation to fix issue-464 I've broken + no-mmap path by forgetting closing brace. + +M src/debugallocation.cc + +commit 2e90b6fd72fec33aedf547d1977bdee6b77645b9 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 21 15:45:44 2014 -0700 + + bumped version to 2.2.1 + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 577b940cc0a0ef207115d071a81127f93f02c083 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 21 15:39:46 2014 -0700 + + updated NEWS for 2.2.1 + +M NEWS + +commit 2fe4b329ad4d3f242b0bd73295375e70be79187b +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun May 18 10:59:06 2014 -0700 + + applied chromium patch fixing some build issue on android + + This applies patch from: https://codereview.chromium.org/284843002/ by + jungjik.lee@samsung.com + +M src/libc_override_gcc_and_weak.h + +commit c009398e3239be8ae6185ad83685caf7458c49bb +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Jun 15 12:53:14 2014 -0700 + + issue-628:package missing stacktrace_powerpc-{linux,darwin}-inl.h + + This headers were missing in .tar.gz because they were not mentioned + anywhere in Makefile.am. + +M Makefile.am + +commit 81d99f21ede78ab8d5fec15d0055416ac1b581f3 +Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> +Date: Tue Jun 3 07:50:56 2014 -0500 + + issue-626: Fix SetupAggressiveDecommit initialization + + This patch fixes the SetupAggressiveDecommit initialization to + run after + pageheap_ creation. Current code it not enforcing it, since + InitStaticVars is being called outside the static_vars module. + +M src/static_vars.cc + +commit 846b775dfadb77901202ae7ddbac30ad1de7df01 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 3 17:50:11 2014 -0700 + + bumped version to 2.2 + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit cdf8e1e932016bd5f1737e1f10bce07226228d15 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 3 17:44:38 2014 -0700 + + updated NEWS for 2.2 + +M NEWS + +commit 0807476f56b9dc024375467be0f0b07fae3ee7fb +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 3 17:38:14 2014 -0700 + + issue-620: windows dll patching: fixed delete of old stub code + + After code for issue 359 was applied PreamblePatcher started using + it's own code to manage memory of stub code fragments. It's not using + new[] anymore. And it automatically frees stub code memory on + Unpatch. + + Clearly, author of that code forgot to remote that no more needed + delete call. With that delete call we end up trying to free memory + that was never allocated with any of known allocators and crash. + +M src/windows/patch_functions.cc + +commit facd7e83b341e069875b7c1cd7774cf671d932b5 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 19 11:08:51 2014 -0700 + + bumped version to 2.1.90 + +M configure.ac +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit e8e082af25952e9bd10c37c3f028c91fcbbd1cbd +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 19 13:16:02 2014 -0700 + + updated NEWS for 2.2rc + +M NEWS + +commit 802fdb739e7aabcb15443030842a2137a5559338 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 12 18:07:11 2014 -0700 + + issue-610: use TCMallocGetenvSafe from inside malloc + + Instead of plain getenv. So that windows getenv implementation that + may call malloc does not deadlock. + +M src/common.cc +M src/thread_cache.cc + +commit 6b83516adefcf0806825f6dba2eb2232615d744b +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 12 18:06:32 2014 -0700 + + issue-610: made dynamic_annotations.c use TCMallocGetenvSafe + +M src/base/dynamic_annotations.c + +commit aeef3b44201ba663c19ca1c97792b724346e84a9 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 12 18:05:59 2014 -0700 + + issue-610: introduced TCMallocGetenvSafe + + This is version of GetenvBeforeMain that's available to C code. + +M Makefile.am +M src/base/sysinfo.cc +A src/getenv_safe.h + +commit 125e5ed58b72550e6b74e0139c2c3dd5e262a6ec +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Apr 12 12:38:19 2014 -0700 + + don't enable backtrace() for stacktrace capturing by default + + Because we don't yet have a treatment for deadlocks that are caused by + (recursive) use of malloc from within that facility. + +M configure.ac + +commit 75b65f96b0bb44d70e0a461a03a3f8c928390283 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Apr 8 17:45:13 2014 -0300 + + PowerPC: stacktrace function refactor and fixes + + This patch fixes the stacktrace creating when the function is + interrupted by a signal. For Linux, the vDSO signal trampoline + symbol is + compared against LR from stack backchain and handled different in that + case (since the signal trampoline layout a different stack frame). + + Because of this extensive change the PowerPC stacktrace code has now + been refactored to split in Linux and Darwin specific codes. + +M src/stacktrace.cc +A src/stacktrace_powerpc-darwin-inl.h +A src/stacktrace_powerpc-linux-inl.h + +commit 8deea9ff2a3e6eef8af64ea0727d6cb840c13769 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Apr 8 17:44:08 2014 -0300 + + VDSOsupport cleanup + + This patch cleans up unused VDSO getcpu racking from VDSOsupport + class, + since the code is not used anywhere in gperftools and symbol name + is not + architecture independent. + +M src/base/vdso_support.cc +M src/base/vdso_support.h + +commit 9d5e1a0aa5a6ad1c7af18b65016fa5c9b7fccb47 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Apr 8 17:43:11 2014 -0300 + + Fixed issues with heap checker on PPC64 LE. + + Fixed the wrapper for the syscall sys_clone and the test for heap + checker on PPC64 LE. Both use the ODP structure, which is only + used on BE architectures. + +M src/base/linux_syscall_support.h +M src/tests/heap-checker_unittest.cc + +commit 49237462c8ae7920332c1034c623e57b50a6109c +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Apr 8 17:42:17 2014 -0300 + + Fixed the way that pprof packed profile data in BE. + + pprof was writing profile data in a way that only works for + little-endian + files, this patch verifies if the system is big-endian and writes + packed + data correctly. + +M src/pprof + +commit a1ae66ef110bd87ff97903e86fd84c745db24646 +Author: Raphael Moreira Zinsly <rzinsly@linux.vnet.ibm.com> +Date: Tue Apr 8 17:37:29 2014 -0300 + + Fixed the use of addr2line to discover the separator symbol. + + In systems where addr2line has a version greater than 2.22 pprof fails + in discover the separator symbol (_fini). This patch identifies if + addr2line can find the symbol, otherwise pprof uses objdump to recover + a address that addr2line's newer versions can recognize as the + separator + function. + +M src/pprof + +commit 8b2e5ee831760a94bc407dc255a522eda242b04f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Apr 1 22:29:11 2014 -0700 + + issue-614: use tc_memalign in ReallocAfterMemalloc test + + Because some OSes lack plain memalign. And we really need to test our + implementation which is always available via tc_malloc. + +M configure.ac +M src/tests/debugallocation_test.cc + +commit 0399af1019240e2d9127a588ddc8e31ff4656df0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Tue Apr 1 21:30:27 2014 -0700 + + added tc_malloc_skip_new_handler + + This is port of corresponding chromium change at: + https://codereview.chromium.org/55333002/ + + Basic idea is that sometimes apps that use tc_set_new_mode in order to + have C++ out-of-memory handler catch OOMs in malloc, need to invoke + usual malloc that returns 0 on OOM. + + That new API is exactly for that. It'll always return NULL on OOM even + if tc_new_mode is set to true. + +M src/debugallocation.cc +M src/gperftools/tcmalloc.h.in +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in + +commit d77317247e1c8de1ea4e0419318b26f636e64431 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 1 12:13:02 2014 -0800 + + issue deprecation warning on use of google/ headers + +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/google/tcmalloc.h + +commit e7297f0c14b0c09ec52707ed7477c7aee43a1882 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 15 12:44:11 2014 -0700 + + speed up MallocExtension::instance() + + It was reported that pthread_once is expensive, especially on ppc. + + In new implementation in hot path instead of doing potentially + expensive atomic read with barrier, we do just plain read. + + It's slightly less robust than older implementation, but it should be + faster. + + New code is making assumption that programs do not spawn threads + before main() is called. And therefore all variables & modules are + initialized before threads are created. Which looks like pretty safe + assumption. With that assumption, doing plain read is safe, because + current_instance is initialized as part of module init and therefore + before threads are spawned. + + This patch is based on feedback of Adhemerval Zanella. + +M src/malloc_extension.cc + +commit df227794361f9725eca7420d95e65ab997c79716 +Author: Adhemerval Zanella <azanella@linux.vnet.ibm.com> +Date: Sun Feb 2 19:32:37 2014 -0200 + + Fix getpc_test for PPC64v2 LE + + This patch fix the PPC64 guard to get the function address for + PPC64v2. + It removes the use of an indirection (to get the ODP text address), + since the PPCv2 does not have function descriptors. + +M src/tests/getpc_test.cc + +commit e3deb42d5dcdeeb8a340d03f207f2488ef3fb2eb +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 29 13:28:01 2014 -0700 + + issue-613: remove friend declaration from HeapLeakChecker + + This applies patch by davide.italiano@10gen.com: + + heap-checker.h contains the following friend declaration of main: + friend int main(int, char**). + + C99 allows another declaration of main, i.e. int main(int, char**, + char**), and if code uses it and includes the heap-checker header, + this might result in a conflict, e.g. + + error: declaration of C function 'int main(int, char**, char**)' + conflicts with + int main(int argc, char* argv[], char** envp) + + Actually the comment above the friend declaration of main() mentions + that this is required to get the unittest working and for other + internal usage, but I'm not completely sure if this is true as long as + I'm able to build and run the unittest removing the declaration. + +M src/gperftools/heap-checker.h + +commit 1a28754656bd6dfc1297d62a4c5811b0a5f03180 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 29 13:23:00 2014 -0700 + + issue-612: added missing include for std::min + + Otherwise Visual Studio 2013 rightfully complains + +M src/windows/system-alloc.cc + +commit fe566314267fc3f67c5068d0afe004b1733ff19a +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Mar 1 12:38:08 2014 -0800 + + unbreak building with libunwind + + Caused by premature merging of previous patch. + + When we're searching for backtrace in libexecinfo and don't find it, + we should not reset UNWIND_LIBS to empty value. + + Correct fix is to first search for backtrace in libunwind and then to + search for it in libexecinfo. + +M configure.ac + +commit 91179961490a601f2c611889b3075e995fa5437e +Author: Thomas Klausner <wiz@NetBSD.org> +Date: Tue Feb 25 21:41:28 2014 +0100 + + Look for backtrace() in libexecinfo as well. + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M configure.ac + +commit fd3379a213b2a99a72a7544b6208056daf15d0d2 +Author: Thomas Klausner <wiz@NetBSD.org> +Date: Tue Feb 25 21:41:07 2014 +0100 + + Fix unportable test(1) construct. + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M configure.ac + +commit a7223c2a14431ffd783117c55f84a2c84423d313 +Author: Thomas Klausner <wiz@NetBSD.org> +Date: Tue Feb 25 21:40:11 2014 +0100 + + Test for memalign in configure.ac. Disable a test that uses memalign + if it is not found. + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M configure.ac +M src/tests/debugallocation_test.cc + +commit bd9665ebbe1aca5e1ba993de4214e64ee8bbe43a +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 22 13:46:42 2014 -0800 + + issue-489: added tcmalloc test pass with chromium-style decommit + +M src/tests/tcmalloc_unittest.sh + +commit 6a000d6dd5968ac29f8fc43f7dfc736338e11781 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 22 13:46:11 2014 -0800 + + issue-489: added unit test for chromium-style decommitting + +M src/tests/tcmalloc_unittest.cc + +commit eb2d69014cb3e163f2ab3ed676fbedf5c3a97caa +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 22 13:10:08 2014 -0800 + + issue-489: made tests pass on enabled chromium-style decommitting + +M src/tests/tcmalloc_unittest.cc + +commit a92fc76f72318f7a46e91d9ef6dd24f2bcf44802 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 22 13:09:05 2014 -0800 + + issue-489: enable chromium-style decommitting on env variable + + TCMALLOC_AGGRESSIVE_DECOMMIT=t now enables aggressive decommitting by + default. + +M src/static_vars.cc + +commit c7ce50cd04ea08bd20d4ea4b2924e6a4451d2565 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 9 17:35:24 2013 -0800 + + issue-489: implemented API to set chromium-style de-committing + + Chrome has code to decommit (release back to OS) every span that's + released. I don't want to make it default, but indeed some + applications may want to enable this mode. + + The code itself is taken from 2-way-merging of code from Chromium + fork. + +M src/page_heap.cc +M src/page_heap.h +M src/tcmalloc.cc + +commit 1d707cd4a3dfe6f238a530f945291acfd5995042 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 9 17:19:34 2013 -0800 + + issue-489: fixed warning + + Computing certain values just for ASSERT raises just warning from + compiler because if NDEBUG is set those are dead code. + +M src/page_heap.cc + +commit 91bffcbad60d84beebe8b69a1db6c85c10fc04bf +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 9 16:22:35 2013 -0800 + + issue-489: ported chromium windows decommitting code + + I tried to do it cleanly with merges but chromium code has so many + relevant commits (with frequent reverts) that makes it near + impossible. + + Simpler 2-way emerge-files worked in the end. I've removed chromium's + aggressive 'always decommit' behavior which I want to make optional + later. + + Majority of this work is the following commits (but there are more, + particularly against port.cc): + + commit 9c92338c5f8770c440799d24387c3733fd6d826b + Author: jamesr@chromium.org + <jamesr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> + Date: Tue Oct 6 18:33:31 2009 +0000 + + Tracks the amount of committed vs uncommitted memory in tcmalloc's + page heap's freelists + + Keeps track of the number of reserved but not committed pages in + the freelist and uses that to calculate a waste metric, which is + the ratio of committed pages vs pages used by the application. + This is exposed in the GetStats() call (which is used for + about:tcmalloc) and through GetNumericalProperty() in Malloc + + BUG=none + TEST=open about:tcmalloc and monitor 'WASTE' columns while using + the browser + + Review URL: http://codereview.chromium.org/251065 + + git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28133 + 0039d316-1c4b-4281-b951-d872f2087c98 + + commit aef4f1be3eec2059a7c6e2c106050a5f3d6ccf12 + Author: jar@chromium.org + <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> + Date: Mon Oct 5 17:58:51 2009 +0000 + + Revert further back to MBelshe's baseline forking TCMalloc + + This changes to decommitting in all paths through the + page_heap delete method (which adds spans to the free lists). + + r=mbelshe,jamesr + Review URL: http://codereview.chromium.org/255067 + + git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28006 + 0039d316-1c4b-4281-b951-d872f2087c98 + + commit e94afbb913b95f512cb8745a2729c73f82b15ae7 + Author: jar@chromium.org + <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> + Date: Thu Oct 1 00:25:41 2009 +0000 + + Rollback Scavenge implemetation and rely on existing functionality + to free + + This is a landing of a patch provided by antonm. See: + http://codereview.chromium.org/235022 + + Also included change to browser_about_handler.cc to fix build, + and I set + TCMALLOC_RELEASE_RATE to 1.0 on line 40 of page_heap.cc (I + think this + was an inadvertent rollback element). + + r=antonm + Review URL: http://codereview.chromium.org/257009 + + git-svn-id: svn://svn.chromium.org/chrome/trunk/src@27692 + 0039d316-1c4b-4281-b951-d872f2087c98 + + commit c585892d2c42a47c95d06a684a6685156c545403 + Author: mbelshe@google.com + <mbelshe@google.com@0039d316-1c4b-4281-b951-d872f2087c98> + Date: Wed Sep 2 17:33:23 2009 +0000 + + Landing for Anton Muhin's tcmalloc patch: + http://codereview.chromium.org/180021/show + + Restore decommitting in IncrementalScavenge and draft Scavenge + method to + be invoked periodically + to reduce amount of committed pages. + + BUG=none + TEST=none + + Review URL: http://codereview.chromium.org/187008 + + git-svn-id: svn://svn.chromium.org/chrome/trunk/src@25188 + 0039d316-1c4b-4281-b951-d872f2087c98 + + commit 14239acc00731e94736ac62e80fc6b17c31ea131 + Author: mbelshe@google.com + <mbelshe@google.com@0039d316-1c4b-4281-b951-d872f2087c98> + Date: Wed Aug 12 02:17:14 2009 +0000 + + Major changes to the Chrome allocator. + + Changes include: + * Fix tcmalloc to release memory. Implements the + TCMalloc_SystemCommit() + mechanism so that tcmalloc can implement SystemRelease() + and later + reuse that memory. + * Enable dynamic switching of allocators based on an environment + variable. + Users can now switch between tcmalloc, jemalloc, the default + windows + heap, and the windows low-fragmentation heap. + * Implements set_new_mode() across all allocators so that we + can be sure + that out-of-memory conditions are handled safely. + + BUG=18345 + TEST=none; plan to get all unit tests running through these + allocators. + + Review URL: http://codereview.chromium.org/165275 + + git-svn-id: svn://svn.chromium.org/chrome/trunk/src@23140 + 0039d316-1c4b-4281-b951-d872f2087c98 + +M src/page_heap.cc +M src/page_heap.h +M src/system-alloc.cc +M src/system-alloc.h +M src/windows/system-alloc.cc + +commit 7be2edfe7f09e7e8c123e958742815784a718880 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 22 12:14:11 2014 -0800 + + issue-525: only warn if --enable-frame-pointers are not given + + This fixes issue when frame pointers warning is given even if frame + pointers are enabled + +M configure.ac + +commit 7e24b6ca2aa2c15a24504c02fad09f47e1ece8b5 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 17:04:26 2014 -0800 + + added debugallocation check for offset_ corruption + + It was previously possible (although unlikely) for damaged offset_ + field + to lead FromRawPointer implementation into different MallocBlock. + + As is usual with any damage, it's best to catch errors at earliest + possible time. + +M src/debugallocation.cc + +commit 6dcd73f1eb5ce2cc44ab918c53cd42c472f44c52 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 16:59:43 2014 -0800 + + avoid crash in DebugMallocImplementation::GetOwnership + + It was possible that if GetOwnership is passed pointer to memory not + owned by tcmalloc, it would crash. Or incorrectly return + owned. I.e. due to indirection in FromRawPointer. + + New implementation prevents that, but introduces different bug + instead. New implementation incorrectly returns "not owned" for + memalign chunks with big alignment. But in can be argued that passing + pointer returned from different memalign implementation did not work + previously too. + +M src/debugallocation.cc + +commit 33280ffb71fc0e4eb75e455d53824c344d011e35 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 16:25:14 2014 -0800 + + removed unused "using" in malloc_extension_test.cc + +M src/tests/malloc_extension_test.cc + +commit 066e524d6e33e9e3364bb6819f98a02b347c14ef +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 14:58:35 2014 -0800 + + eliminated useless BASE_XXX defines in debugallocation.cc + + And closed TODO entry for that. + +M src/debugallocation.cc + +commit a2375a1f360c0451ec2a2b852ea26a71fb731727 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 14:49:57 2014 -0800 + + issue-464: correctly handle realloc after memalign in debugalloc + + debug memalign is creating special header block to allow us to find + real allocated block. And previous implementation of data copying + wasn't taking that into account and was copying that "alignment + header" into newly allocated block. + +M src/debugallocation.cc +M src/tests/debugallocation_test.cc + +commit d31f522f0e1b0e87ee3d5941e331ba745a0f5c23 +Author: Riku Voipio <riku.voipio@linaro.org> +Date: Mon Feb 3 16:31:32 2014 +0200 + + Add aarch64 defines + + With atomic operations and system call support in place, enable + with __aarch64__ defines Aarch64 support in other files around + the google-perftools header files. After these, google-perftools + testsuite (make check) results: + + 8 of 46 tests failed. + + FAIL: sampling_test.sh + FAIL: heap-profiler_unittest.sh + FAIL: heap-checker_unittest.sh + FAIL: heap-checker-death_unittest.sh + FAIL: sampling_debug_test.sh + FAIL: heap-profiler_debug_unittest.sh + FAIL: heap-checker_debug_unittest.sh + FAIL: profiler_unittest.sh + + While it indicates that there is still work to do, This is still + better than the result I get on ARMv7: + + 12 of 46 tests failed. + +M src/base/basictypes.h +M src/base/cycleclock.h +M src/base/linuxthreads.h +M src/malloc_hook_mmap_linux.h + +commit 15b5e7a35c83ce5d38fa523f2c291a2ac30bb8ed +Author: Riku Voipio <riku.voipio@linaro.org> +Date: Tue Feb 4 11:09:47 2014 +0200 + + linux_syscall_support.h: add aarch64 support + + Aarch64 support for linux_syscall_support.h. Since Aarch64 is a brand + new architecture, none of the legacy system calls are neccesarily + available. Thus some changes were neccesary affect other architectures + as well: + + 1) use getdents64 where available and else getdents (for ppc64) + + 2) other legacy system calls, pipe, waitpid and open replaced + by pipe2, + wait4 and openat where available. + + 3) use fstatat if stat is not available. + + The aarch64 system call interface follows the Aarch64 calling + convention + (regs x0-x5 for arguments and x8 system call number - return in x0). + Clone implementation is adapted from glibc. + + v2: step back in getdents removal due to ppc64 + +M src/base/linux_syscall_support.h +M src/base/linuxthreads.cc + +commit b5e584dfdc22328f713488311707f502aa77ff5f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 19:41:37 2014 -0800 + + issue-525: warn user on lack of libunwind but don't fail + + Because we now have access to other backtrace capturing method(s) at + runtime. + +M configure.ac + +commit 90ba15d1f2f6704af96f62ce1e8c5f214697bab1 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 8 15:30:36 2014 -0800 + + issue-604: implement runtime-selectable stacktrace capturing + + We're now building all supported stacktrace capturing methods. And + there's now a way to select at runtime which method is used. + +M Makefile.am +M configure.ac +M src/stacktrace.cc +M src/stacktrace_arm-inl.h +D src/stacktrace_config.h +M src/stacktrace_generic-inl.h +C055 src/stacktrace.cc src/stacktrace_impl_setup-inl.h +M src/stacktrace_instrument-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +M src/stacktrace_win32-inl.h +M src/stacktrace_x86-inl.h + +commit 33f6781d64af88ea23698a084188d8c2ab94ecb1 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 18:47:04 2014 -0800 + + issue-605: avoid compilation errors if pthread_key_t is pointer + + Which seems to be the case on later cygwin + +M src/maybe_threads.cc + +commit 100f310088aa30d347cd5d07bbe9d9f80cf2c2d0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Feb 16 18:27:14 2014 -0800 + + unbreak make dist + +M Makefile.am + +commit a0ed9ace534372fbeee9bfefd6e26b922f0a1835 +Author: Wang YanQing <udknight@gmail.com> +Date: Tue Feb 11 23:20:31 2014 +0800 + + debugallocation: fix bus error on mipsel-linux platform when enable + use_malloc_page_fence + + Fix below "BUS ERROR" issue: + + a0 hold start address of memory block allocated by DebugAllocate + in debugallocation.cc + + gdb) info registers + zero at v0 v1 a0 a1 + a2 a3 + R0 00000000 10008700 772f62a0 00084d40 766dcfef 7fb5f420 00000000 + 004b4dd8 + t0 t1 t2 t3 t4 t5 + t6 t7 + R8 7713c1a0 7712dbc0 ffffffff 777bc000 f0000000 00000001 00000000 + 00403d10 + s0 s1 s2 s3 s4 s5 + s6 s7 + R16 7fb5ff1c 00401b9c 77050020 7fb5fb18 00000000 004cb008 004ca748 + ffffffff + t8 t9 k0 k1 gp sp + s8 ra + R24 0000002f 771adcd4 00000000 00000000 771f4140 7fb5f408 7fb5f430 + 771add6c + sr lo hi bad cause pc + 00008713 0000e9fe 00000334 766dcff7 00800010 771adcfc + fsr fir + 00000004 00000000 + + (gdb) disassemble + Dump of assembler code for function _ZNSs4_Rep10_M_disposeERKSaIcE: + 0x771adcd4 <+0>: lui gp,0x4 + 0x771adcd8 <+4>: addiu gp,gp,25708 + 0x771adcdc <+8>: addu gp,gp,t9 + 0x771adce0 <+12>: lw v0,-28696(gp) + 0x771adce4 <+16>: beq a0,v0,0x771add38 + <_ZNSs4_Rep10_M_disposeERKSaIcE+100> + 0x771adce8 <+20>: nop + 0x771adcec <+24>: lw v0,-30356(gp) + 0x771adcf0 <+28>: beqzl v0,0x771add1c + <_ZNSs4_Rep10_M_disposeERKSaIcE+72> + 0x771adcf4 <+32>: lw v0,8(a0) + 0x771adcf8 <+36>: sync + => 0x771adcfc <+40>: ll v0,8(a0) + 0x771add00 <+44>: addiu at,v0,-1 + 0x771add04 <+48>: sc at,8(a0) + 0x771add08 <+52>: beqz at,0x771adcfc + <_ZNSs4_Rep10_M_disposeERKSaIcE+40> + 0x771add0c <+56>: nop + 0x771add10 <+60>: sync + 0x771add14 <+64>: b 0x771add24 + <_ZNSs4_Rep10_M_disposeERKSaIcE+80> + 0x771add18 <+68>: nop + 0x771add1c <+72>: addiu v1,v0,-1 + 0x771add20 <+76>: sw v1,8(a0) + 0x771add24 <+80>: bgtz v0,0x771add38 + <_ZNSs4_Rep10_M_disposeERKSaIcE+100> + 0x771add28 <+84>: nop + 0x771add2c <+88>: lw t9,-27072(gp) + 0x771add30 <+92>: jr t9 + 0x771add34 <+96>: nop + 0x771add38 <+100>: jr ra + 0x771add3c <+104>: nop + End of assembler dump. + + ll instruction manual: + Load Linked: + Loads the destination register with the contents of the word + that is at the memory location. This instruction implicity performs + a SYNC operation; all loads and stores to shared memory fetched prior + to the ll must access memory before the ll, and loads and stores to + shared memory fetched subsequent to the ll must access memory + after ll. + Load Linked and Store Conditional can be use to automatically update + memory locations. *This instruction is not valid in the mips1 + architectures. + The machine signals an address exception when the effective address + is not + divisible by four. + + Signed-off-by: Wang YanQing <udknight@gmail.com> + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + [alk@tut.by: removed addition of unused #include] + +M src/debugallocation.cc + +commit 38bfc7a1c2f9ba718bcaa9b82fdcd9c429c8f85f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 8 14:10:11 2014 -0800 + + removed irrelevant comment + +M src/base/atomicops.h + +commit d03c467a3446088b229f0106d9f47fab6d7c52a4 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 8 13:35:16 2014 -0800 + + allow asking for gcc atomics on all platforms + + I.e. by doing ./configure CPPFLAGS=-DTCMALLOC_PREFER_GCC_ATOMICS + +M src/base/atomicops.h + +commit 6de1f38b687e3974d67adad45217f35a2c376049 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Feb 8 13:43:04 2014 -0800 + + chmod -x configure.ac + + Because configure.ac is not really executable. And because it + interferes with tab completion of configure. + +M configure.ac + +commit e8fe990fa06e337a07059c55adc6ed2a0888de95 +Author: Riku Voipio <riku.voipio@linaro.org> +Date: Wed Jan 29 10:54:29 2014 +0200 + + implement atomics with gcc intrinsics + + Gcc after 4.7 provides atomic builtins[1]. Use these instead of adding + yet-another-assembly port for Aarch64 (64-bit ARM). This patch enables + succesfully building and running atomicops unittest on Aarch64. + + This patch enables using gcc builtins only when no assembly + implementation is provided. But as a quick check, atomicops_unittest + and rest of testsuite passes with atomicops-internals-gcc also + ARMv7 and X86_64 if the ifdef in atomicops is adjusted to prefer + the generic implementation. + + [1] http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html + +A src/base/atomicops-internals-gcc.h +M src/base/atomicops.h + +commit fa4b1c401da1ac381d4d72172825231b3d5518d9 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Jan 19 22:37:44 2014 -0800 + + issue-599: fixing FreeBSD issue with sbrk + + Applied patch by yurivict. + + It was wrong assembly specifically for FreeBSD in sbrk overriding + code. + +M src/malloc_hook_mmap_freebsd.h + +commit 71a239e559c9ea2300ad0511f8de7077db5369c3 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Jan 19 12:30:53 2014 -0800 + + check debug_malloc_implementation_space via COMPILE_ASSERT + + Because we can and because compile-time is always better. + +M src/debugallocation.cc + +commit 54568e32fc2321e0adef15fb1eab4e3a7f8ce5b0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 21 18:14:00 2013 -0700 + + issue-565: don't pollute global namespace with thread lister API + + Instead those functions that are original taken from google's "base" + code now have prefix TCMalloc_. So that they don't conflict with other + google's libraries having same functions. + +M src/base/linuxthreads.cc +M src/base/thread_lister.c +M src/base/thread_lister.h +M src/gperftools/heap-checker.h +M src/heap-checker.cc +M src/memory_region_map.h + +commit 64bc1baa1f4723d73ba40cd730b72896bd45a810 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat May 18 17:11:58 2013 -0700 + + issue-{66,547}: use signal's ucontext when unwinding backtrace + + In issue-66 (and readme) it is pointed out that sometimes there are + some issues grabbing backtrace across signal handler boundary. + + This code attempts to fix it by grabbing backtrace from signal's + ucontext which clearly does not include signal handler boundary. + + We're using "feature" of libunwind that for some important platforms + libunwind's context is same as libc's ucontext_t which is given to us + as part of calling signal handler. + +M src/stacktrace_libunwind-inl.h + +commit 185bf3fcc36f8cb3839abdfe652f615bfb5306d1 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 11 12:46:02 2014 -0800 + + issue-581: avoid destructing DebugMallocImplementation + + Because otherwise destructor might be invoked well before other places + that might touch malloc extension instance. + + We're using placement new to initialize it and pass pointer to + MallocExtension::Register. Which ensures that destructor for it is + never run. + + Based on idea suggested by Andrew C. Morrow. + +M src/debugallocation.cc + +commit 48a0d131c1aa088c6075e9c4676ee430f81d8600 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 18 13:27:41 2014 -0800 + + issue-548: pass -fno-builtin to compiler for unittests + + Because clang doesn't understand -fno-builtin-malloc and friends. And + otherwise new/delete pairs get optimized away causing our tests that + expect hooks to be called to fail. + +M Makefile.am + +commit e98371540d63dde53ce4b7c772d78c1da6b59ea8 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 11 16:28:15 2014 -0800 + + eliminated gcc warning on __thread configure snippet + + gcc complained about lack of matching ' in code that force-fails + __thread detection on mingw + +M configure.ac + +commit 60b12171bc73117c0108b847bb310af095cd2778 +Author: xiaoyur347 <xiaoyur347@gmail.com> +Date: Sat Jan 11 11:39:53 2014 +0800 + + fix GCC version detect for platforms other than X86/X64 + + [alk@tut.by: commented why we're disabling __thread not just for x86] + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M configure.ac + +commit 764d304222f0c3057ab99babd06246016cbfa505 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Jan 5 12:49:23 2014 -0800 + + don't re-define strtoq for VS2013 + + Which is part of previous change that wasn't correctly applied. + +M src/windows/port.h + +commit 1fc768864d506466b45f3f38474912bcb69bb772 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 4 18:28:36 2014 -0800 + + fix compilation under VS 2013 + + This is essentially a copy of corresponding chromium change from: + https://codereview.chromium.org/27017003 + +M src/windows/port.cc +M src/windows/port.h + +commit 4c274b9e20132230e62117ff583ebadd83081d90 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 4 18:28:36 2014 -0800 + + issue-592: handle recent mingw with C++11 threads + + Somehow it's c++ headers (like string) define pthread symbols without + even us asking for. That breaks old assumption that pthread symbols + are not available on windows. + + In order to fix that we detect this condition in configure.ac and + avoid defining windows versions of pthread symbols. + +M Makefile.am +M configure.ac +M src/windows/mingw.h +M src/windows/port.h + +commit 1458ee2239e0791567e69112931dc17eb0456cf8 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 4 13:54:24 2014 -0800 + + issue-596: removed unused AtomicIncrement operation + + There's no need for us to attempt to maintain Google's atomic ops code + in era of C++11. + +M src/base/atomicops-internals-arm-generic.h +M src/base/atomicops-internals-arm-v6plus.h +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-mips.h +M src/base/atomicops-internals-windows.h +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/tests/atomicops_unittest.cc + +commit 6630b24e27c6a62727fe73aaae21dcc7364b8fee +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jan 4 13:07:35 2014 -0800 + + Removed unused AtomicPtr::CompareAndSwap + +M src/malloc_hook-inl.h +M src/malloc_hook.cc + +commit a15115271cc475509b17bf7fecbe1ac4966baf2e +Author: xiaoyur347 <xiaoyur347@gmail.com> +Date: Fri Dec 20 09:41:08 2013 +0800 + + add "-finstrument-functions" support for MIPS uclibc. + should configure with CXXFLAGS="-finstrument-functions" + +M src/stacktrace_config.h +A src/stacktrace_instrument-inl.h + +commit 7c4888515ed93347d4793fc066cd6048e519a197 +Author: xiaoyur347 <xiaoyur347@gmail.com> +Date: Fri Dec 20 09:02:49 2013 +0800 + + add uclibc support + * some variables defined with "char *" should be modified to + "const char*" + * For uclibc, glibc's "void malloc_stats(void)" should be "void + malloc_stats(FILE *)", is commented now. + * For uclibc, __sbrk is with attribute "hidden", so we use mmap + allocator for uclibc. + +M Makefile.am +M src/heap-checker.cc +M src/heap-profiler.cc +M src/libc_override_gcc_and_weak.h +M src/malloc_hook_mmap_linux.h +M src/memory_region_map.cc +M src/symbolize.cc +M src/system-alloc.cc + +commit 7bd193bca97d93b43ff6c824bc9f39227329312f +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Dec 14 12:03:02 2013 -0800 + + issue-586: detect main executable even if PIE is active + + Previous logic of detecting main program addresses is to assume that + main executable is at least addressess. With PIE (active by default on + Ubuntus) it doesn't work. + + In order to deal with that, we're attempting to find main executable + mapping in /proc/[pid]/maps. And old logic is preserved too just in + case. + +M src/pprof + +commit f8a2163b5131050765ea877e2573f4930e41f630 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Fri Dec 6 12:23:48 2013 -0800 + + Added AM_MAINTAINER_MODE to disable Makefile rebuild rules + + Some people might want to check-in unpacked result on make dist into + git. But because git doesn't preserve timestamps it would cause those + automatic "auto-retool" rules to trigger. Sometimes even causing build + breakage if system's autotools version don't match autotools version + used for make dist. + + Easiest way around this problem is to simply disable those unnecessary + "maintainer" rebuild rules. Especially given that source is always + freely available via git and therefore there should be no reason to + regenerate any of autotools products in 'make dist'-produced sources. + +M configure.ac + +commit 925bbaea76b91bd307634908cfd6902f99804544 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 14:01:38 2013 -0800 + + actually check result of CheckAddressBits + + Previously call to CheckAddressBits was made but nothing was done to + it's result. + + I've also make sure that actual size is used in checks and in bumping + up of TCMalloc_SystemTaken. + +M src/system-alloc.cc + +commit f216317a879e972ceafe77e61b2d66fd5f29922e +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 15:05:45 2013 -0800 + + use AC_PROG_LIBTOOL to summon libtool + + So that older autotools of rhel 5 can be used + +M configure.ac + +commit d4f4c5a3104e30b14b1090241fb4d0fac6a0e357 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 14:00:19 2013 -0800 + + assert that ClassSize(0) is 0 instead >=0 + + Because it's return value being size_t cannot be negative + anyways. This fixes clang warning + +M src/common.cc + +commit 946203d60e50488a0b9e0fe003c13662873fa17d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 13:35:59 2013 -0800 + + assert key size in way that is clearer to gcc + + Both new and old asserts are checking same condition, however new + assert helps gcc see that out of bounds access is not possible in + root_ array. + +M src/pagemap.h + +commit bf2d7bd3f8c1fb6a0843e55e652e37f4ce0fae3d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 13:31:34 2013 -0800 + + fixed gcc warning + + We've recently changed old_signal_handler to by integer, so comparing + it with NULL is not good idea. + +M src/heap-profiler.cc + +commit dd5f979c5e6e7e3127835a659a5af89ac21597d5 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 13:31:08 2013 -0800 + + fixed -Wreorder warning in HeapProfileTable constructor + +M src/heap-profile-table.cc + +commit e4ea98f147a7602979f70be022de068b04e68060 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 9 14:19:16 2013 -0800 + + issue-585: fixed use of TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES + + In order to apply that, we're now doing explicit EnvToInt64 call as + part of initializing thread cache module. + +M src/thread_cache.cc + +commit e0102230ec7e8304155798bf7f03d6abcd5991ee +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 12:03:35 2013 -0800 + + issue-588: Fix profiler_unittest.cc fork() + + As suggested by Hannes Weisbach. + + Call heap-profiler_unittest with the arguments 1 -2 (one iteration, 2 + fork()ed children). + + Instead of running the test, the program crashes with a std::bad_alloc + exception. This is caused by unconditionally passing the + number-of-threads-argument (0 or positive for threads, negative for + fork()s) in RunManyThreads(), thus allocating an array of pthread_t of + size -2. Depending on the sign of the thread number argument either + RunManyThreads or fork() should be called. + +M src/tests/profiler_unittest.cc + +commit 2bf83af65664a2badbaebdb722ad498e8b38548c +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 16 11:35:32 2013 -0800 + + issue-587: fix typos in unit test scripts + + As proposed by Hannes Weisbach. + + The argument will be garbled because of a misplaced brace, for example + (heap-checker_unittest.sh): + + HEAP_CHECKER="${1:-$BINDIR}/heap-checker_unittest" + which should be: + HEAP_CHECKER="${1:-$BINDIR/heap-checker_unittest}" + + This unit test is used to check the binaries heap-checker_unittest and + heap-checker_debug_unittest. With the typo, the executable + heap-checker_debug_unittest is never actually run. + +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.sh +M src/tests/heap-profiler_unittest.sh +M src/tests/tcmalloc_unittest.sh + +commit b3b19269783cf1ed163bdb447cef9ca11b10851c +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Nov 9 12:28:55 2013 -0800 + + issue-584: added license note to files without explicit license + + As suggested at corresponding chromium issue discussion it's seemingly + sufficient to simply refer to project-wide LICENSE file. + +M src/tests/page_heap_test.cc +M src/tests/profile-handler_unittest.cc +M src/tests/raw_printer_test.cc +M src/tests/stack_trace_table_test.cc +M src/windows/config.h + +commit 7be35fb0d844c73f5c5301f62074294df5acbc5c +Author: Joonsoo Kim <iamjoonsoo.kim@lge.com> +Date: Thu Oct 10 14:26:47 2013 +0900 + + central_freelist: change fetch ordering + + When we fetch objects from the span for thread cache, we make + reverse-ordered list against original list on the span and suppy + this list + to thread cache. This algorithm has trouble with newly created span. + Newly created span has ascending ordered objects list. Since thread + cache + will get reverse-ordered list against it, user gets objects as + descending order. + + Following example shows what occurs in this algorithm. + + new span: object list: 1 -> 2 -> 3 -> 4 -> 5 -> ... + fetch N items: N -> N-1 -> N-2 -> ... -> 2 -> 1 -> NULL + thread cache: N -> N-1 -> N-2 -> ... -> 2 -> 1 -> NULL + + user's 1st malloc: N + user's 2nd malloc: N-1 + ... + user's Nth malloc: 1 + + In general, access memory with ascending order is better than + descending + order in terms of the performance. So this patch fix this situation. + + I run below program to measure performance effect. + + #define MALLOC_SIZE (512) + #define CACHE_SIZE (64) + #define TOUCH_SIZE (512 / CACHE_SIZE) + + array = malloc(sizeof(void *) * count); + + for (i = 0; i < 1; i++) { + for (j = 0; j < count; j++) { + x = malloc(MALLOC_SIZE); + array[j] = x; + } + } + + repeat = 10; + for (i = 0; i < repeat; i++) { + for (j = 0; j < count; j++) { + x = array[j]; + for (k = 0; k < TOUCH_SIZE; k++) { + *(x + (k * CACHE_SIZE)) = '1'; + } + } + } + + LD_PRELOAD=libtcmalloc_minimal.so perf stat -r 10 ./a.out 1000000 + + **** Before **** + Performance counter stats for './a.out 1000000' (10 runs): + + 2.715161299 seconds time elapsed + ( +- 0.07% ) + + **** After **** + Performance counter stats for './a.out 1000000' (10 runs): + + 2.259366428 seconds time elapsed + ( +- 0.08% ) + +M src/central_freelist.cc + +commit 7315b45c28564afdc2699beff934a3b45457edc6 +Author: Joonsoo Kim <iamjoonsoo.kim@lge.com> +Date: Thu Oct 10 11:46:12 2013 +0900 + + central_freelist: fetch objects as much as possible during each trial + + It is better to reduce function call if possible. If we try to fetch + objects from one span as much as possible during each function call, + number of function call would be reduced and this would help + performance. + +M src/central_freelist.cc +M src/central_freelist.h + +commit cc002ea19363e1ebbd7f3e809d116ab81a6862cd +Author: Joonsoo Kim <iamjoonsoo.kim@lge.com> +Date: Thu Oct 10 10:06:33 2013 +0900 + + skip unnecessary check during double-check SizeClass intergrity + + On initialization step, tcmalloc double-checks SizeClass integrity + with + all possible size values, 0 to kMaxSize. This causes tremendous + overhead + for short-lived applications. + + For example, consider following command. + 'find -exec grep something {} \;' + + Actual work of each grep is really small, but double-check requires + more work. To reduce this overhead, it is best to remove double-check + entirely. But we cannot be sure the integrity without double-checking, + so alternative is needed. + + This patch doesn't remove double-check, instead, try to skip + unnecessary + check based on ClassIndex() implementation. This reduce much + overhead and + the code has same coverage as previous double-check. Following is + the result of this patch. + + time LD_PRELOAD=libtcmalloc_minimal.so find ./ -exec grep "SOMETHING" + {} \; + + * Before + real 0m3.675s + user 0m1.000s + sys 0m0.640s + + * This patch + real 0m2.833s + user 0m0.056s + sys 0m0.220s + + * Remove double-check entirely + real 0m2.675s + user 0m0.072s + sys 0m0.184s + +M src/common.cc + +commit 3e9a33e8c708ccf3ec91e3a3b14e924f5f79e4a6 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Oct 26 16:54:03 2013 -0700 + + issue-583: include pthread.h into static_var.cc + + Because we're doing pthread_atfork. + + Fix suggested by user named drussel. + +M src/static_vars.cc + +commit db0d5730ee059d72b895fbead5237f9cb5bbf98a +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Jun 22 13:48:11 2013 -0700 + + issue-579: ensure order between memory region and libunwind locks + + I.e. to prevent possible deadlock when this locks are taked by + different threads in different order. + + This particular problem was also reported as part of issue 66. + +M src/memory_region_map.cc + +commit 42ddc8d42c82ba6f5137c26b4e7f752b1a022831 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 21 19:03:52 2013 -0700 + + added emacs -*- mode lines for google coding style + +M src/addressmap-inl.h +M src/base/atomicops-internals-arm-generic.h +M src/base/atomicops-internals-arm-v6plus.h +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-mips.h +M src/base/atomicops-internals-windows.h +M src/base/atomicops-internals-x86.cc +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/base/basictypes.h +M src/base/commandlineflags.h +M src/base/cycleclock.h +M src/base/elf_mem_image.cc +M src/base/elf_mem_image.h +M src/base/elfcore.h +M src/base/googleinit.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.cc +M src/base/logging.cc +M src/base/logging.h +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h +M src/base/simple_mutex.h +M src/base/spinlock.cc +M src/base/spinlock.h +M src/base/spinlock_internal.cc +M src/base/spinlock_internal.h +M src/base/spinlock_linux-inl.h +M src/base/spinlock_posix-inl.h +M src/base/spinlock_win32-inl.h +M src/base/stl_allocator.h +M src/base/synchronization_profiling.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/thread_lister.h +M src/central_freelist.cc +M src/central_freelist.h +M src/common.cc +M src/common.h +M src/config_for_unittests.h +M src/debugallocation.cc +M src/getpc.h +M src/gperftools/heap-checker.h +M src/gperftools/heap-profiler.h +M src/gperftools/malloc_extension.h +M src/gperftools/malloc_hook.h +M src/gperftools/profiler.h +M src/gperftools/stacktrace.h +M src/gperftools/tcmalloc.h.in +M src/heap-checker-bcad.cc +M src/heap-checker.cc +M src/heap-profile-stats.h +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/libc_override.h +M src/libc_override_gcc_and_weak.h +M src/libc_override_glibc.h +M src/libc_override_osx.h +M src/libc_override_redefine.h +M src/linked_list.h +M src/malloc_extension.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/malloc_hook_mmap_freebsd.h +M src/malloc_hook_mmap_linux.h +M src/maybe_threads.cc +M src/maybe_threads.h +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/packed-cache-inl.h +M src/page_heap.cc +M src/page_heap.h +M src/page_heap_allocator.h +M src/pagemap.h +M src/profile-handler.cc +M src/profile-handler.h +M src/profiledata.cc +M src/profiledata.h +M src/profiler.cc +M src/raw_printer.cc +M src/raw_printer.h +M src/sampler.cc +M src/sampler.h +M src/span.cc +M src/span.h +M src/stack_trace_table.cc +M src/stack_trace_table.h +M src/stacktrace.cc +M src/stacktrace_arm-inl.h +M src/stacktrace_config.h +M src/stacktrace_generic-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +M src/stacktrace_win32-inl.h +M src/stacktrace_x86-inl.h +M src/static_vars.cc +M src/static_vars.h +M src/symbolize.cc +M src/symbolize.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tcmalloc.h +M src/tcmalloc_guard.h +M src/tests/addressmap_unittest.cc +M src/tests/atomicops_unittest.cc +M src/tests/current_allocated_bytes_test.cc +M src/tests/debugallocation_test.cc +M src/tests/frag_unittest.cc +M src/tests/getpc_test.cc +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/low_level_alloc_unittest.cc +M src/tests/malloc_extension_c_test.c +M src/tests/malloc_extension_test.cc +M src/tests/malloc_hook_test.cc +M src/tests/markidle_unittest.cc +M src/tests/memalign_unittest.cc +M src/tests/packed-cache_test.cc +M src/tests/page_heap_test.cc +M src/tests/pagemap_unittest.cc +M src/tests/profile-handler_unittest.cc +M src/tests/profiledata_unittest.cc +M src/tests/profiler_unittest.cc +M src/tests/raw_printer_test.cc +M src/tests/realloc_unittest.cc +M src/tests/sampler_test.cc +M src/tests/sampling_test.cc +M src/tests/simple_compat_test.cc +M src/tests/stack_trace_table_test.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_large_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/testutil.cc +M src/tests/testutil.h +M src/tests/thread_dealloc_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/auto_testing_hook.h +M src/windows/get_mangled_names.cc +M src/windows/gperftools/tcmalloc.h +M src/windows/gperftools/tcmalloc.h.in +M src/windows/mingw.h +M src/windows/mini_disassembler.cc +M src/windows/mini_disassembler.h +M src/windows/mini_disassembler_types.h +M src/windows/override_functions.cc +M src/windows/port.cc +M src/windows/port.h +M src/windows/preamble_patcher.cc +M src/windows/preamble_patcher.h +M src/windows/preamble_patcher_test.cc +M src/windows/preamble_patcher_with_stub.cc + +commit 799a22624c85e8749f8bed0bfa63282b03e75bdd +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 28 19:32:20 2013 -0700 + + issue-575: do not use cycle count register on arm6 + + Apparently not all arm6 implementations implement it in this + particular way. + + This applies patch by Ben Avison. + +M src/base/cycleclock.h + +commit 2a2d6596f8c3d62b7ec444761f4edf0c85d10d92 +Author: Petr Hosek <phosek@chromium.org> +Date: Fri Aug 23 23:58:47 2013 -0700 + + Adds system-alloc_unittest Visual Studio project + +M gperftools.sln +A vsprojects/system-alloc_unittest/system-alloc_unittest.vcproj + +commit 83aed118e009b92ea88645ef1f7b842a921612c5 +Author: Petr Hosek <phosek@chromium.org> +Date: Fri Aug 23 13:53:35 2013 -0700 + + issue-567: Allows for overriding system allocator on Windows + + [alk@tut.by: minor changes to make mingw build work] + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M Makefile.am +M src/windows/port.cc +A src/windows/system-alloc.cc +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 4ad16873a0a2d8861a0bfe8234d45e31cc70ee90 +Author: Petr Hosek <phosek@chromium.org> +Date: Sat Aug 24 14:24:47 2013 -0700 + + Exports SysAllocator class to avoid .dll build errors + +M src/gperftools/malloc_extension.h + +commit 326990b5c30d249c3cf4688a88fc415b05494aca +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 14 14:38:53 2013 -0700 + + issue-557: added support for dumping heap profile via signal + + This applies patch from Jean Lee. + + I've reformatted it to match surronding code style and changed + validation logic a bit. I.e. we're not checking signal for range + anymore given we're not sure what different platforms support, but + we're checking return value of signal() for SIG_ERR instead. + +M src/heap-profiler.cc + +commit cb65e49b83c84bc205203c12793f2dd00c4a7721 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Sep 14 16:45:42 2013 -0700 + + issue-536: do not PrintStats if running under valgrind + + When we detect running under valgrind we do not initialize our own + malloc. So trying to print malloc stats when asked via MALLOCSTATS + cannot work. + + This does fix proposed by Philippe Waroquiers. In which we detect + running under valgrind prior to checking MALLOCSTATS environment + variable and refuse printing stats if we detect valgrind. + +M src/tcmalloc.cc + +commit 6979583592df555a369a2c975f5117a1f61911af +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Mon Sep 9 07:59:25 2013 -0700 + + issue-564: added atomic ops support for mips{,64} + + This merges patch contributed by Jovan Zelincevic. + + And with that patch tcmalloc build with --enable-minimal (just malloc + replacement) appears to work (passes unit tests). + +M Makefile.am +A src/base/atomicops-internals-mips.h +M src/base/atomicops.h +M src/base/basictypes.h +M src/base/linux_syscall_support.h +M src/stacktrace_config.h + +commit 28dd85e2825af71138621a4417e6ab004631924d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Fri Aug 30 16:57:14 2013 +0300 + + implement pc from ucontext access for mips + +M m4/pc_from_ucontext.m4 + +commit 819a2b051f1dba9526f2338098fff6dd1700bdb6 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Thu Aug 29 19:00:31 2013 +0300 + + issue-413: disable __thread usage on OSX + + Because it was found that __thread variables access is compiled into + calls to tlv_get_addr which was found to call malloc. Because we + actually use thread-local storage from inside malloc it leads to stack + overflow. So we'll continue using pthreads API for that which is known + to work on OSX. + +M configure.ac + +commit 43809080931127037ce6e748f37a28ce7489387d +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Thu Aug 29 18:51:48 2013 +0300 + + lowered autoconf requirement + + Autoconf 2.59 works. And most notably it will not affect our releases + which are all prepared with newer autoconf. + +M configure.ac + +commit 7a178d472737d988583bade3f0345201651cd43f +Author: Joon-Sung Um <joonsung.um@gmail.com> +Date: Tue Aug 27 22:16:18 2013 +0900 + + Update document for tcmalloc + + Update tcmalloc.html for new parameters. + + * kMaxSize = 256k + * kNumClasses = 88 + * kPageShift = 13 + + Signed-off-by: Aliaksey Kandratsenka <alk@tut.by> + +M doc/tcmalloc.html + +commit 313e08b5a1a951f710b0323b3ca4e1e8d869520e +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 4 20:44:06 2013 +0300 + + issue-560: Revert "issue-481: ... + + ...Replaced test mechanism for distinct address spaces with a more + reliable mechanism" + + This reverts commit 5dd53ab6cbf9d98f2d60546835e84785a104da46 (svn + revision 167) + + With this commit rhel 6.2 fails heap-checker-death_unittest and + without it passes. + + Ticket refers to 2 things and both are invalid: + + * that ptrace PEEKDATA ignores data argument. I've checked kernel + source and found it to be wrong + + * something about distinct address spaces + + And in addition to all that original ticket admits that it doesn't fix + anything. + + It looks like, compared to original code that "fix" is not succesfully + wait-ing on parent's ptrace request. I.e. by adding some additional + diagnostics I'm seeing this sys_waitpid returning ECHILD. + +M src/base/linuxthreads.cc + +commit 6d00cbce92c5576b4bcf07a2b5634b4dcfa14f8a +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 17 15:52:53 2013 +0300 + + issue-561: don't cast function pointer to void * + + Which gcc-3.4 (as shipped in rhel 4) doesn't like. + + Cast to void * was originally added to avoid issue on OSX which + doesn't have sighandler_t. + + In that place we only need to know if it's null or not. So casting to + intptr_t looks like simplest possible way to achieve that. + +M src/profiler.cc + +commit 7d8d522d737f6c55169264b35ebe0be1f9b23efd +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sat Aug 17 15:52:22 2013 +0300 + + add heap-profile-stats.h to dist .tar.gz + +M Makefile.am + +commit d76cfa6d6ca5944d6300765fcb9160e889423750 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 4 18:35:55 2013 +0300 + + issue-502: Count m(un)map for each stacktrace in MemoryRegionMap + + ..instead of HeapProfileTable + + This upstreams chromium commit reviewed at: + https://codereview.chromium.org/12388070 + + Original and upstreaming author is: Dai MIKURUBE + + This patch fixes a bug that gperftools(TCMalloc)'s mmap profiler + (HEAP_PROFILE_MMAP) doesn't hook some memory pages used by the + profiler itself. + + This problem has been lived in gperftools for a long time. + It is discussed in gperftools' issue 502. + https://code.google.com/p/gperftools/issues/detail?id=502 + + Some bugs in the mmap profiler were fixed by + https://code.google.com/p/gperftools/issues/detail?id=383, + but the patch in the issue 383 didn't fix the bug mentioned in + the issue 502. + + This change reverts the previous patch and http://crrev.com/132771 + at first. Then, it modifies MemoryRegionMap to count m(un)map + calls for each stacktrace in itself instead of merging the counts + for each stacktrace in HeapProfileTable. + + This change also cleans up heap-profiler, heap-profile-table and + deep-heap-profile. + + Chromium-BUG=https://code.google.com/p/chromium/issues/detail?id=181517 + Chromium-Committed: + https://src.chromium.org/viewvc/chrome?view=rev&revision=188176 + +M src/heap-checker.cc +A src/heap-profile-stats.h +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/memory_region_map.cc +M src/memory_region_map.h + +commit 89b163a0883d40a612331ed8565b8ab1219fdbd0 +Author: Aliaksey Kandratsenka <alk@tut.by> +Date: Sun Aug 4 18:27:07 2013 +0300 + + added .gitignore + +A .gitignore + +commit 18fbc316eebea9db2d7ec41e161c0a3fbb09fa42 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jul 31 04:02:19 2013 +0000 + + Fix a C++11 error. + + This merges patch by Nico Weber. + + New clang versions complain in C++11 mode that: + + error: cannot initialize a variable of type 'void *' with an rvalue + of type 'uintptr_t' (aka 'unsigned long') + + This same change was done for the google-internal version of + tcmalloc too. + + Reviewed-at: https://codereview.appspot.com/12132043 + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@238 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/stack_trace_table.cc + +commit 674fcd94a8a0a3595f64e13762ba3a6529e09926 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jul 30 09:14:20 2013 +0000 + + bump version to 2.1 + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@236 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS +M configure.ac +M packages/deb/changelog +M src/windows/config.h +M src/windows/gperftools/tcmalloc.h + +commit 805a6601939edd9bca60a8911e56b217e477c75e +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jul 30 08:30:45 2013 +0000 + + issue-559: don't setup fork handler in InitStaticVars + + Because on OSX it calls malloc which leads to deadlock. + + Given that we don't really need that fork handler _that_ early, it's + fine to change it to normal static initializer + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@235 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/static_vars.cc + +commit c583a5babb54aedf6ab6e7746fd976ef2469839d +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jul 30 08:30:19 2013 +0000 + + issue-559: don't depend on sighandler_t + + Which is not available on OSX. + + I've also fixed style around this place. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@234 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/profiler.cc + +commit 1af20041bf5821167e69bf905596b2d45d1d94da +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jul 30 08:29:46 2013 +0000 + + issue-559: AtomicWord xxx_AtomicExchange are all inline + + Otherwise OSX correctly complains about duplicate definitions + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@233 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops.h + +commit 7a9c4e075fe2572307cddd11bc2f5c7d6b3d29ee +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 19:36:08 2013 +0000 + + issue-556: update windows PACKAGE_{VERSION,STRING} + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@232 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/config.h + +commit 11d46f0aac5ef941b74114412bda4747a349c60d +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 19:35:38 2013 +0000 + + issue-556: dll-export getpagesize + + Because page_heap_test needs this. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@231 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/port.cc +M src/windows/port.h + +commit 6d4ad4428bb8f587d890fa5b7a76ba65dcf48dcb +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 19:35:20 2013 +0000 + + issue-556: drop malloc limit at the end of page_heap_test + + Which otherwise causes somewhat weird stack overflow on release + windows builds. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@230 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/page_heap_test.cc + +commit be54c3f3ed192eabab9b075ec924d2479a956b9d +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 19:34:54 2013 +0000 + + issue-556: dll-export TCMalloc_System{Alloc,Release} + + because page_heap_test is using this stuff + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@229 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/system-alloc.h +M src/windows/port.cc + +commit e3716145cbfa1247fa37dd627b2136a061944255 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 18:19:05 2013 +0000 + + issue-552: Fix page_heap_test for system with different page size + + This is patch by Adhemerval Zanella. + + PowerPC uses 64K page size instead of 4k for x86 and x86_64. It + makes the + page_heap_test fails because the following test: + + static bool HaveSystemRelease = + TCMalloc_SystemRelease(TCMalloc_SystemAlloc(kPageSize, NULL, + 0), kPageSize); + + will always fail if kPageSize is less than getpagesize() (the default + configuration). + + The following patch fixes it by trying to allocate/deallocate + an entire + page instead. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@228 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/page_heap_test.cc + +commit f45133e75c09ca7d5e86bda2db16e30c6fa348c0 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Jul 26 18:04:03 2013 +0000 + + issue-553: Fix syscall wrapper for PowerPC + + This is patch by Adhemerval Zanella. + + * src/stacktrace_powerpc-inl.h: It is just a cleanup for the + stacktrace + functions for PowerPC. The idea is to simplify the code. + * src/tests/heap-checker_unittest.cc: Handles the PPC64 function + descriptor + correctly in malloc tracers. Different from other architecture, + for PPC64 + the address returned in function pointers are the ODP entry, not the + symbol address in .text segment. This leads the comparison bogus, + since + it will compare a ODP entry with a .text address. + * src/heap-checker.cc: Add support for PPC in ptrace. + * src/base/elfcore.h: Likewise. + * src/base/linuxthreads.cc: Fix the thread creation using the clone + wrapper. + * src/base/linux_syscall_support.h: Various fixes for PPC32 and PPC64: + fixes the kernel_stat[64] struct layout, and sys_clone and + sys_socket + implementation. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@227 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/elfcore.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.cc +M src/heap-checker.cc +M src/stacktrace_powerpc-inl.h +M src/tests/heap-checker_unittest.cc + +commit ee2bf097133b115e3da249e43507e02645e46e59 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 20 21:37:26 2013 +0000 + + updated NEWS and packages/ for 2.1rc + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@226 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS +M packages/deb/changelog +M packages/deb/control +M packages/deb/copyright +M packages/rpm/rpm.spec + +commit 60ab178aeb827378d78cae8f1c6f893b14deaaf0 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 20 21:37:01 2013 +0000 + + bump version number for 2.1rc + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@225 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure.ac +M src/windows/gperftools/tcmalloc.h + +commit 7c2aa2f7c2cf901c17b9b9215224f725e798d1f2 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 20 21:35:56 2013 +0000 + + issue-546: fixed return type of Release_AtomicExchange on arm6+ + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@224 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-v6plus.h + +commit d8e12e94ea09eaf8b040c6887f75cdf832b5619b +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 20 21:35:14 2013 +0000 + + issue-549: handle most recent mingw that has sleep and nanosleep + + I.e. we have to check their presence in configure and in case of their + presence we have to avoid re-defining then in window's port.h + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@223 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure.ac +M src/windows/port.h + +commit ac354636de8a4f11e4fde679b52e9f58fda0e079 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Jul 14 04:19:02 2013 +0000 + + issue-550: remove config.h.in + + Which is autoconf product and thus is not needed in source repository + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@222 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +D src/config.h.in + +commit e54971d58641853a9515d9f0313645729eab113a +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 6 21:54:34 2013 +0000 + + issue-534: fixed a number of gcc warnings + + This applies patch from Adhemerval Zanella. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@221 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/basictypes.h +M src/heap-profiler.cc +M src/profiler.cc +M src/tests/heap-checker_unittest.cc +M src/tests/page_heap_test.cc + +commit 7dd038d7c58c9de889e3fcc552161533ea1baab1 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 6 20:48:18 2013 +0000 + + issue-545: corrected README for CPUPROFILE_FREQUENCY variable + + Applied patch by Mikhail Veltishchev + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@220 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M README + +commit a833a146b7de93774f0a9428edc5eda410d9a7dc +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 6 20:46:50 2013 +0000 + + issue-528: fixed spelling + + This simply applies patch by Lajos Veres + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@219 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M doc/heapprofile.html +M doc/pprof_remote_servers.html +M src/base/spinlock.cc +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/gperftools/profiler.h +M src/pprof +M src/stacktrace_x86-inl.h +M src/windows/patch_functions.cc +M src/windows/preamble_patcher.cc + +commit 205abf1e7cb860224cbf391c7f69a6192ccc5076 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun May 12 19:15:13 2013 +0000 + + issue-518: add support for x32 ABI to linux_syscall_support.h + + As pointed out in the ticket this is taken from chromium review system + here: https://codereview.chromium.org/13648012 + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@218 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linux_syscall_support.h + +commit 376a3107aa06063d72132318ac5ad266466e4325 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue May 7 22:28:05 2013 +0000 + + issue-368: allocate enough bytes for large metadata allocations + + During issue-368 review it was correctly pointed out then in place + where I compare metadata allocation size to threshold I should pass + that size down to TCMalloc_SystemAlloc instead of threshold. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@217 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/common.cc + +commit 58d39a1b2190b9d0b719ee33f7bc5383a9b27462 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue May 7 22:23:37 2013 +0000 + + issue-368: added missing large_heap_fragmentation_unittest.cc + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@216 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +A src/tests/large_heap_fragmentation_unittest.cc + +commit c45bb7d6033952ea33bb181c1f96999317b535a1 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue May 7 19:22:45 2013 +0000 + + issue-511: fixed negative offset handling for conditional jumps + + While doing and testing issue-511 I've found one subtle bug which is + incorrect handling of short offsets. They are defined to be signed but + previous code used unsigned char for them which caused negative + offsets to look like larger positive offsets. Fix is trivial. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@215 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/preamble_patcher.cc + +commit 3567b1701aa55a6421aefa5f3de1ca5507cbdf79 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue May 7 19:15:35 2013 +0000 + + issue-511: recognise rex.w jmpq *<literal>(%rip) as iat jump + + Apparently Windows Server 2012 (and presumably windows 8) now has this + form of iat jump. Which is quite useless (rex.w is according to my + understanding is not needed at all) but because of rex.w our code to + recognize jumps like that didn't work. + + Fix is just skip this prefix. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@214 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/preamble_patcher.cc + +commit 7fcb5ac0696e7ef7f7e7e51c18745af25de7da96 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue May 7 19:13:29 2013 +0000 + + issue-511: implemented rel8 jump patching + + I've found that Visual Studio 2012 release 32-bit C runtime library + patching fails because array new has rel8 jmp which previous code + could not handle. + + Implementation is largely copied from conditional jumps handling code. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@213 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/preamble_patcher.cc +M src/windows/preamble_patcher.h +M src/windows/preamble_patcher_with_stub.cc + +commit 8cb4086a0aee539869bd087a85881788545b23d6 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon May 6 19:50:59 2013 +0000 + + issue-368: test that we don't fragment large spans too badly + + This adds unit test that does essentially same things as code to + reproduce bug in + https://code.google.com/p/gperftools/issues/detail?id=368 + + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@212 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am + +commit f25ac4421f9efb635b88105e9c0830293ce19f9d +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon May 6 19:50:23 2013 +0000 + + issue-{368,443}: allocate metadata in big batches + + It uses same approach as PageHeapAllocator. Namely allocates big chunk + which is then used to satisfy smaller allocations. + + In issue-443 gradual heap grows causes old code that allocates + metadata in smaller pieces and thus more frequently to fragment the + heap. It's also causing most of 368 heap fragmentation too. + + New code allocates 8 megs of address space at once for metadata + allocations. Most OSes will allocate actual memory only when + corresponding pages are touched. Thus this change should not cause + increased memory usage. + + I've also made sure metadata is always properly aligned in case we + ever allocate something that breaks natural alignment. E.g. strings. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@211 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/common.cc + +commit 4fd762cead660d4661359ad507083ac4f4967ee4 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon May 6 19:49:48 2013 +0000 + + issue-368: unmap free spans and retry before growing heap + + Because unmapped spans are not coalesced with normal spans it's + possible that we indeed have a large enough free span, but we fail to + see that because we always consider unmapped and normal spans + separately. That behavior is more likely for larger spans. + + In order to protect programs that grow heap frequently and by small + amounts from much more frequent minor page faults, there's limit of + running that force pages unmap path once per 128 megs of heap growth. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@210 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/page_heap.cc + +commit 99fe9944de32046fd954399e60415fba7d03eeb0 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Apr 1 05:32:22 2013 +0000 + + issue-510: remove duplicate arm #elif + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@209 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/basictypes.h + +commit 6354e2c8cdaaaeffdfe7d5b347b125394e2d55fa +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:24:49 2013 +0000 + + issue-506: fixed bogus unit test failure + + Looks like my version of GCC is aware that free(malloc(X)) is a + no-op. So it optimizes that away completely ignoring simple fact that + we're observing malloc hooks invocations. By adding check that malloc + succeeded we force gcc to actually preserve that malloc call. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@208 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/malloc_extension_c_test.c + +commit 7896dcb9db2375fcd1d8d19052985a12ad113ba9 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:16:47 2013 +0000 + + issue-504: disable tls on mingw + + Which is known to fail. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@207 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure.ac + +commit 8bf4522aa53a7b612a865bc10716192f0f05314b +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:15:46 2013 +0000 + + issue-504: do not define HAVE_MMAP on mingw + + Because, clearly, windows doesn't have one + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@206 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure.ac + +commit 9c24d255cdd8dab7482d968acc69d493680233ce +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:15:23 2013 +0000 + + issue-504: add AM_LDFLAGS to all *_la targets + + Because automake will not automatically add AM_LDFLAGS if there's + per-target LDFLAGS. See their good info manual. + + This fixes .dll compilation of tcmalloc + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@205 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am + +commit a2275fc6b48b98d5265388bb8586faa7c8cc581f +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:14:18 2013 +0000 + + issue-504: don't try to typedef pid_t on mingw + + Because recent mingws (more then few years ago seemingly) do that + already. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@204 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/mingw.h +M src/windows/port.h + +commit f00977533de6fd27a2f796b2e8e9adb7fcd965e8 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:13:53 2013 +0000 + + issue-504: use gcc inline assembly atomic ops on mingw + + Because those are well tested and can be trusted + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@203 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M src/base/atomicops.h + +commit 34cdd6821b637b124722687578aa119e8714266c +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 19:13:13 2013 +0000 + + issue-504: use lowercase windows includes for mingw x-compilation + + I.e. so that I can build tcmalloc.dll using comfortable environment of + my GNU/Linux box and without having to touch actual windows box or VM. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@202 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M src/base/sysinfo.h +M src/windows/patch_functions.cc + +commit 0971d80d0ef0c881dcf46453e576a7e289d86975 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 18:35:16 2013 +0000 + + issue-503: updated svn:ignore to ignore generated files + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@201 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit 05b100d9d4ffeb1342eb5ecf50bd0d2339e626c5 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 18:32:05 2013 +0000 + + issue-503: removed checked in configure and other auto* products + + So that it's easier to contribute and review contributions. + + People wishing to build gperftools from svn checkout should run + autogen.sh first which will create those files and then ./configure && + make as usual + + INSTALL file has updated instructions too. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@200 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +D Makefile.in +D aclocal.m4 +M autogen.sh +D compile +D config.guess +D config.sub +D configure +M configure.ac +D depcomp +D install-sh +D ltmain.sh +D m4/libtool.m4 +D m4/ltoptions.m4 +D m4/ltsugar.m4 +D m4/ltversion.m4 +D m4/lt~obsolete.m4 +D missing +D mkinstalldirs + +commit dcf55b976f19a186ac04d60d95e03ed2795285c4 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 18:29:00 2013 +0000 + + issue-503: recover original INSTALL file + + That was accidently overwritten by autotools files update in r196 + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@199 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL + +commit 5037f687d1de80722f634e84763572caf2725f98 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 01:35:37 2013 +0000 + + issue-425: Fixed tcmalloc unittest crashes for windows builds + + Missing use of volatile was causing vs2010 to perform unwanted + optimization resulting in the crash. See issue for more details. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@198 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/tcmalloc_unittest.cc + +commit beb78cc05babf0a49d21aed0ec789f19fc0f2d28 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 11 00:23:03 2013 +0000 + + issue-493: Fix for building against ARM targets + + gperftools was failing to build for arm targets for the following + reasons: + 1. Some ARMv7 instructions used when the target is ARMv6 so those + fail to assemble + 2. The cache line length is undefined for ARM architectures + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@197 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-v6plus.h +M src/base/basictypes.h + +commit bd3b3a7e9a67fac846cf574f5bfd241157bdfe3c +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Mar 10 20:17:21 2013 +0000 + + issue-496: Fixes an issue where tcmalloc might deadlock in a thread + calling fork + + tcmalloc contained a bug where some internal locks were left in a + undefined state + between fork, leaving the child process in a deadlock state. This + patch fixes the + issue by introducing stricter locking between the parent nd child + while forking. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@196 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.in +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +M depcomp +M install-sh +M missing +M mkinstalldirs +M src/central_freelist.h +M src/config.h.in +M src/static_vars.cc + +commit 560ca8650c8a9d2971420970f0ed5e17848150eb +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Mar 10 20:02:46 2013 +0000 + + issue-491: Significant performance improvement for spin lock + contention + + This patch fixes issues where spinlocks under contention were + failing to + wakeup waiters, sometimes resulting in blow ups from 13ns to as high + as 256ms. + Under heavy contention, applications were observed sleeping for + minutes at a + time giving the appearance of a hang. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@195 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/spinlock.h + +commit b591d53af951eac60683237204464ebfec2c3afa +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Mar 10 19:54:39 2013 +0000 + + issue-490: Added support for acquire/release atomic exchange + + The atomic ops API was missing support for all of the various + flavors of Acquire_AtomicExchange and Release_AtomicExchange + which have now been added. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@194 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-generic.h +M src/base/atomicops-internals-arm-v6plus.h +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-windows.h +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/tests/atomicops_unittest.cc + +commit 687207b5fcfadc821a3d5b94f2ea112ca933e476 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Mar 10 19:44:43 2013 +0000 + + issue-443: Increase the number of objects transferred between thread + cache and central free list + + This fix is a result of a performance degradation observed in + multi-threaded programs where large + amounts of memory (30GB) are consumed, released by a pool of threads + in a cyclic manner. This was + mainly due to the amount of time we were spending in the slow path + consolidating memory between + the thread cache and central free list. The default has been bumped + up to 32768 and is now also + controllable through the TCMALLOC_TRANSFER_NUM_OBJ environment + setting. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@193 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M src/common.cc +M src/common.h +A src/tests/tcmalloc_unittest.sh + +commit 19eff86f850e54683c8868316e362af8919df18a +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Mar 9 01:16:17 2013 +0000 + + actually made svn:ignore work + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@192 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit 177a896a8273ae73affc48a126fcf9a3ba91f502 +Author: alkondratenko@gmail.com +<alkondratenko@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Mar 9 01:06:58 2013 +0000 + + added proper svn:ignore + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@191 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit b96203b591f63d24cd2979241b18534d22358bca +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 20:14:08 2012 +0000 + + issue-461: Fix to malloc_extension.h so that it builds with + -std=gnu++98 + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@190 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/gperftools/malloc_extension.h + +commit 84b983c8d43f43a3c7f71d45d51fc4adcc688cd9 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 20:06:47 2012 +0000 + + issue-465: Adding automagic support for __builtin_expect + + Previously __builtin_ expect was based on a macro check against + gcc version. + Now we perform the check via AM which is a cleaner approach. There + are also + a number of code changes here to utilize LIKELY/UNLIKELY macros + based on + __builtin_expect to improve performance. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@189 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure +M configure.ac +M src/base/basictypes.h +M src/common.h +M src/config.h.in +M src/tcmalloc.cc + +commit a5dacccd6ae4cbfedb5263bfe0f325f03c7f0db8 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 19:06:35 2012 +0000 + + issue-466: Clarified stats output and comments for ExtractStats() + and GetThreadStats() + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@188 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tcmalloc.cc +M src/thread_cache.h + +commit 09d97533b09e473c0cdd269e8cf4e9a9737e49fa +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 19:02:52 2012 +0000 + + issue-467: Fixed issue with allocation size being narrowed to 32-bit + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@187 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tcmalloc.cc + +commit 990889e6232ff3787f1d42d4091a0478ffb93988 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 18:53:18 2012 +0000 + + issue-470: Allows the default system allocator to be overidden + + This is useful when you run into cases where you need an allocator + before tcmalloc has been fully initialized. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@186 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/system-alloc.cc + +commit 3bf5f55d78769d2f8525626dd662457d2f014151 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 18:38:48 2012 +0000 + + issue-482: Adds support for CACHELINE_ALIGNED value for arm based + devices + + Hardcoded the cacheline size to 32 because this value is supported + by several + arm devices, however a better logic needs to be implemented at some + point to + handle more arm devices. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@185 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/basictypes.h + +commit 6856d1d1b2a807efd84dcafa10b865b6de22bc28 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 18:34:43 2012 +0000 + + issue-488: Performance improvement for initialization checks + + These came in from the Google-internal version of tcmalloc. They + saw some + decent speed improvements, both on microbenchmarks and big + programs. The + idea is to improve the speed of the "Is everything initialized?" type + of + code, that's at the start of all allocations and deallocations. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@184 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/static_vars.h +M src/tcmalloc.cc +M src/thread_cache.cc +M src/thread_cache.h + +commit ad5aa05838121d52ad1fde5463a796c3320fe067 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 22 18:25:58 2012 +0000 + + issue-483: Speed up accesses to ClassIndex() + + Making its return type unsigned can save a conversion from + signed to unsigned, and getting rid of the ternary operators + seems to help a little bit as well. Various gcc versions weren't + generating conditional moves for them as one would expect. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@183 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/basictypes.h +M src/common.h + +commit 8de78fd85b69bc569ac8fc9e75144e02f5cae851 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Nov 5 04:45:01 2012 +0000 + + issue-452: Adds a control mechanism to the cpu profiler to be switched + on and off using a user defined signal. + + CPUPROFILESIGNAL - Takes a signal number between the value of 1 and + 64 inclusive which represents a signal + number as defined by signal.h. The signal must + not be in use by the program. Sending this + signal to the program turns profiling on and off + like a switch. By default the switch is + off when the program starts. Successive profiles + are suffixed with a monotonically increasing + number. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@182 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M doc/cpuprofile.html +M src/profiler.cc + +commit 46f06ca0db41d3f598750caefdaf3c6063969e64 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 23:08:17 2012 +0000 + + issue-451: Fixed incorrect assembly for 64-bit barrier load and + store on windows platforms. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@181 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-windows.h + +commit 5fe91d5623c2351ba4675db71822fc6be5e2cbce +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 22:53:01 2012 +0000 + + issue-450: Move includes for struct mallinfo from tcmalloc.cc to + tcmalloc.h to fix compiler warnings from clang. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@180 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/gperftools/tcmalloc.h.in +M src/tcmalloc.cc + +commit 86a55316baf2a7c0718670fd7342b648e0ef4a87 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 19:12:42 2012 +0000 + + issue-449: Disables mmap check when cross compiling since it is + meaningless anyways + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@179 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure +M configure.ac + +commit 6f6c2bf68576e43491884364fc05c9ed752adb4f +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 19:02:15 2012 +0000 + + issue-448: New environment setting that allows you to set an explicit + heap limit + + TCMALLOC_HEAP_LIMIT_MB - The maximum amount of heap memory that + tcmalloc can use. + TCMALLOC_DISABLE_MEMORY_RELEASE - emulate platform with no + MADV_DONTNEED support (generally for testing purposes). + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@178 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/page_heap.cc +M src/page_heap.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tests/page_heap_test.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/port.cc + +commit 644b1c6e355a0f5dd948ca482a575f49a4bd2032 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 18:30:05 2012 +0000 + + issue-445: Fix for wrong cmpx instruction in OSAtomicCompareAndSwap64 + for ppc64 + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@177 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-linuxppc.h + +commit e32bb2d9a76dc5ee42ee41a2ca2c6e5caece0150 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 18:24:46 2012 +0000 + + issue-444: Fix for invalid conversion build error in signal handler + code + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@176 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linux_syscall_support.h + +commit abeaf46028c8dfab7e7867ee7a3a49ebe21cf129 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 18:15:11 2012 +0000 + + issue-430: Introduces 8-byte alignment support for tcmalloc + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@175 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/common.cc +M src/common.h +M src/tests/tcmalloc_unittest.cc + +commit baaf0188295582ca68df03f70baa13d96a88e2eb +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 17:41:47 2012 +0000 + + issue-473: Fix for make install error regarding + src/windows/google/tcmalloc.h + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@174 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in + +commit 1363bc69550f43af693f6455118432988042a8dd +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 17:21:00 2012 +0000 + + issue-475: Re-applying changes made originally for r150 and r151 + + - Used aclocal, autoupdate, autoconf, and automake to + correctly apply changes made to Makefile.am. Detailed + instructions on this procedure can be found here + http://inti.sourceforge.net/tutorial/libinti/autotoolsproject.html. + - Fixed a number of error/warning messages due to use of newer + aclocal, autoconf, and automake utilities. + - Directly and indirectly related to issue-385 and issue-480. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@173 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M aclocal.m4 +M configure +M configure.ac +M m4/libtool.m4 + +commit 1243295cdcc20f2f47df9bdc9b8bb50b69c085b0 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Nov 4 15:59:08 2012 +0000 + + issue-475: Amend of r151 and r150 + + In revisions 151 and 150 an attempt was made to enable frame pointers + by default for i386. However, in the process of doing so a number of + files were inadvertently touched as a result of running autogen.sh. As + a result, I have needed to roll back these revisions so that I can + reattempt the change. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@172 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.am +M Makefile.in +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +M depcomp +M install-sh +M ltmain.sh +M m4/libtool.m4 +M m4/ltoptions.m4 +M m4/ltversion.m4 +M m4/lt~obsolete.m4 +M missing +M mkinstalldirs + +commit 5c17459c5e7b4133a2dec5ffe6a9237a114f5378 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 16:14:25 2012 +0000 + + issue-476: Amendment to r152:issue-437 to fix a build error due to + PR_SET_TRACER not being defined in older glibc releases. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@171 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linuxthreads.cc + +commit 6287bbbbad8730712cfd1ee28ecc0648cbaa9f94 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 15:56:27 2012 +0000 + + issue-477: Fix clang compilation errors regarding format specifiers + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@170 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/sysinfo.cc +M src/debugallocation.cc +M src/getpc.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/memory_region_map.cc +M src/symbolize.cc +M src/tcmalloc.cc +M src/tests/markidle_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit 87699c97cc07b30fc2765415853ae685f7bc45f2 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 15:40:01 2012 +0000 + + issue-478: Added the heap profiler tunnable setting + HEAP_PROFILER_TIME_INTERVAL used to dump heap profiles on a specified + time interval. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@169 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M doc/heapprofile.html +M src/heap-profiler.cc + +commit 4eb6946d689bbaa74f8be78265f6d6ff9ae04ab2 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 15:27:58 2012 +0000 + + issue-479: Replaced sscanf usage while iterating proc maps entries + to fix a deadlock introduced by glibc-2.15. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@168 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/sysinfo.cc + +commit 5dd53ab6cbf9d98f2d60546835e84785a104da46 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 14:52:42 2012 +0000 + + issue-481: Replaced test mechanism for distinct address spaces with + a more reliable mechanism + + Rather than using sys_ptrace+PTRACE_PEEK_DATA to determine whether + address spaces are distinct, we now use sys_waitpid+__WCLONE. See + issue-481 for a more detailed rationale. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@167 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linuxthreads.cc + +commit 6c436203522156699ef11c6792ed481e02cef366 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 14:36:33 2012 +0000 + + amend-r165: forgot to regenrate configure and Makefile.in + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@166 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.in +M configure + +commit c566266b10d2fc4e0c67fec25e2a45ebad803196 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Nov 3 14:13:21 2012 +0000 + + issue-480: duplicate of issue-385 fixed in r150 however some of the + comments in the code + regarding frame pointers needed to be clarified. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@165 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M configure.ac + +commit 4f325fbcc9f0db17fbf108188a32b2c67bcd38e0 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Oct 28 19:45:03 2012 +0000 + + issue-424: Updated heap profiler documentation that had + mispelling for the heap profiler mmap only environment setting + (HEAP_PROFILE_ONLY_MMAP) + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@164 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M doc/heapprofile.html + +commit a828c0a8ff59300fffce466cc617d40fa39dedea +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Oct 28 19:30:23 2012 +0000 + + issue-417: This commit removed the fatal error message printed along + with the pprof usage information when pprof is run with no arguments. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@163 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/pprof + +commit e5b095abdc2368111f53cedf376cd1505a2c7583 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Oct 28 18:28:21 2012 +0000 + + issue-411: This commit adds additional logging to the cpu profiler + to warn when the profiler is run and no CPUPROFILE environment + setting can be found. It also adds a new environment variable + PERFTOOLS_UNITTEST to allow certain modules to take action when + running under the umbrella of a unit test. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@162 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/profiler.cc +M src/tests/profiler_unittest.sh + +commit 57c48e9b5ffdb0f177c07ea8420c4072e816e1a7 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sun Oct 28 14:51:41 2012 +0000 + + issue-405: backed out original commit made in revision 147 due to + the fact that it didn't work as intended + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@161 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/heap-checker.cc + +commit 3e296c28c3c2b67df624d372b75650ae65c87b04 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 02:47:24 2012 +0000 + + issue-442: Fixes an issue encountered on PPC with undefined + conversions from 'base::subtle::Atomic64*' to 'const volatile + Atomic32*' + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@160 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/spinlock_internal.cc + +commit 1f59bb34d3ce03df51c8356dd44fcb0b15de4769 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 02:33:00 2012 +0000 + + Adding support for CPU frequencies for Linux on PowerPC + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@159 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/sysinfo.cc + +commit a2fec7c694ce992a3b7d3fe4205bb8710b017bab +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 01:43:02 2012 +0000 + + issue-446: Fixes compatability issues in malloc_hook_test for PPC64 + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@158 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/malloc_hook_test.cc + +commit b05b6fc7655ba7fee8397da7013db007f0c47e30 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 01:27:34 2012 +0000 + + issue-458: Optimizes malloc hooks for weakly consistent memory + architectures + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@157 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/malloc_hook-inl.h + +commit 00edf667f62e17a268f03c6aaef19946add75dcf +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 01:18:15 2012 +0000 + + issue-456: Adds support for CACHELINE_ALIGNED values for PPC + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@156 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/basictypes.h + +commit 731d0932a081d521a36462df7dcbc45fa4221011 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 00:59:16 2012 +0000 + + issue-455: Adds another size for large pages in tcmalloc through + the new define TCMALLOC_LARGE_PAGES64K + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@155 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/common.h + +commit 771b6bcf689bd089b802ddcdb27a40b4fb4a8e3b +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 00:52:13 2012 +0000 + + issue-457: Fixed an issue where the 'getpc' testcase was using the + wrong PPC64 preprocessor directive + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@154 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tests/getpc_test.cc + +commit 711232a1ef4997305a681287191de74f6597dabc +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 00:42:23 2012 +0000 + + issue-453 Added support to get the timebase register value using + just one instruction and also adjusts the PPC32 code to the recent + GLIBC one that implements the same functionality + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@153 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/cycleclock.h + +commit fa0209f261c5e065d523bb1858f84fd91eb2f39a +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 18 00:00:20 2012 +0000 + + issue-437 Fixed issues related to new glibc shipped with Ubuntu 10.10 + + 1. ptrace permissions were modifed to be a bit more strict which + required + us to programatically set the permissions while syncing up to + the profiling + thread. + + 2. Order of destructors registered with atexit changed which was + casuing us to + miss generating the backtrace when heap checker was finished. Seems + that we + initially fixed this for FreeBSD and now linux has changed their + behaviour + to be the same. We are now a bit stricter on the rules here + accross all + platforms. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@152 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linuxthreads.cc +M src/heap-checker.cc + +commit cd723b43ff783a05321f0c0ba79a82494185b23c +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed May 16 12:24:13 2012 +0000 + + issue-385: updates to automake to pick up the explicit setting of + the -fno-omit-frame-pointer for i386 + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@151 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.am +M Makefile.in +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +M depcomp +M install-sh +M ltmain.sh +M m4/libtool.m4 +M m4/ltoptions.m4 +M m4/ltversion.m4 +M m4/lt~obsolete.m4 +M missing +M mkinstalldirs + +commit 0c0382234d5812474b070b6eccdfd99c98490548 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed May 16 01:28:08 2012 +0000 + + issue-385: as of gcc 4.6 we now need to enable frame pointers for + i386 platforms since the default is to now omit frame pointers + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@150 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am + +commit bafd0f8b7cec4dd0b74a4a8e5456b96744640679 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Apr 21 17:27:54 2012 +0000 + + Changing internal behaviour of tcmalloc to return an + allocated size of 0 when the target pointer is null (refer to + TCMallocImplementation::GetAllocatedSize). This change was reviewed + as https://codereview.appspot.com/5833050/. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@149 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tcmalloc.cc + +commit 9be2f8d802a19ab6e4b2abed3f7cd84780628d16 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Apr 21 16:15:20 2012 +0000 + + Adding in further support for 64-bit NoBarrier_Load on windows + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@148 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-windows.h + +commit 68df7fa6057a29b3130d41a27592f74d58c2df57 +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Mar 3 19:45:49 2012 +0000 + + issue-405: Added a tunable to the heap checker that allows the heap + checker error exit code to be overridden. For larger projects this + allows certain heap checked programs that have intentional/known + leaks to be managed. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@147 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/heap-checker.cc + +commit 0a9d3f8af373c71e5fabdc8c01239c47df4743ed +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Mar 2 06:04:08 2012 +0000 + + issue-410: This set of changes exposes the central cache free list + size, transfer cache free list size, and the meta data overhead size + through the getNumericProperty() function. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@146 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/gperftools/malloc_extension.h +M src/tcmalloc.cc + +commit 1233f0f9101c3f4e85e04711c7223dc7f73ec4fb +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Feb 21 03:02:43 2012 +0000 + + This file is in the tarball, but I forgot to put it in svn as well. + (It's tricky because it's auto-generated from tcmalloc.h.in at + tarball-making time.) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@145 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +A src/windows/gperftools/tcmalloc.h + +commit ce2891234584069f90e6e0a446e28bf7ca3e031d +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:10:11 2012 +0000 + + Url-typo in the latest NEWS message. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@144 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS + +commit 4e9432c5090d5b8354b0b57fca8b96b2efef37e3 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:07:36 2012 +0000 + + Fri Feb 03 15:40:45 2012 Google + Inc. <google-perftools@googlegroups.com> + + * gperftools: version 2.0 + * Renamed the project from google-perftools to gperftools + (csilvers) + * Renamed the .deb/.rpm packagse from google-perftools to + gperftools too + * Renamed include directory from google/ to gperftools/ + (csilvers) + * Changed the 'official' perftools email in setup.py/etc + * Renamed google-perftools.sln to gperftools.sln + * PORTING: Removed bash-isms & grep -q in + heap-checker-death_unittest.sh + * Changed copyright text to reflect Google's relinquished + ownership + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@142 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M AUTHORS +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M NEWS +M README +M README_windows.txt +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +M depcomp +M doc/cpuprofile.html +M doc/designstyle.css +M doc/heap_checker.html +M doc/heapprofile.html +M doc/index.html +M doc/pprof.1 +M doc/pprof.see_also +M doc/pprof_remote_servers.html +R100 google-perftools.sln gperftools.sln +M install-sh +M missing +M mkinstalldirs +M packages/deb/changelog +M packages/deb/control +M packages/deb/copyright +R071 packages/deb/libgoogle-perftools-dev.dirs +packages/deb/libgperftools-dev.dirs +R079 packages/deb/libgoogle-perftools-dev.install +packages/deb/libgperftools-dev.install +R100 packages/deb/libgoogle-perftools0.dirs +packages/deb/libgperftools0.dirs +R100 packages/deb/libgoogle-perftools0.install +packages/deb/libgperftools0.install +R100 packages/deb/libgoogle-perftools0.manpages +packages/deb/libgperftools0.manpages +M packages/rpm/rpm.spec +M src/base/low_level_alloc.cc +M src/config.h.in +M src/debugallocation.cc +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/stacktrace.h +A src/google/tcmalloc.h +C099 src/google/heap-checker.h src/gperftools/heap-checker.h +C100 src/google/heap-profiler.h src/gperftools/heap-profiler.h +C100 src/google/malloc_extension.h src/gperftools/malloc_extension.h +C100 src/google/malloc_extension_c.h +src/gperftools/malloc_extension_c.h +C099 src/google/malloc_hook.h src/gperftools/malloc_hook.h +C100 src/google/malloc_hook_c.h src/gperftools/malloc_hook_c.h +C100 src/google/profiler.h src/gperftools/profiler.h +C100 src/google/stacktrace.h src/gperftools/stacktrace.h +R098 src/google/tcmalloc.h.in src/gperftools/tcmalloc.h.in +M src/heap-checker-bcad.cc +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/internal_logging.cc +M src/libc_override.h +M src/libc_override_gcc_and_weak.h +M src/libc_override_glibc.h +M src/libc_override_osx.h +M src/malloc_extension.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/profiler.cc +M src/stacktrace.cc +M src/stacktrace_arm-inl.h +M src/stacktrace_generic-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +M src/stacktrace_win32-inl.h +M src/stacktrace_x86-inl.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/current_allocated_bytes_test.cc +M src/tests/debugallocation_test.cc +M src/tests/frag_unittest.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/low_level_alloc_unittest.cc +M src/tests/malloc_extension_c_test.c +M src/tests/malloc_extension_test.cc +M src/tests/malloc_hook_test.cc +M src/tests/markidle_unittest.cc +M src/tests/profiler_unittest.cc +M src/tests/sampling_test.cc +C059 src/tests/current_allocated_bytes_test.cc +src/tests/simple_compat_test.cc +M src/tests/stacktrace_unittest.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/thread_dealloc_unittest.cc +A src/windows/auto_testing_hook.h +M src/windows/config.h +A src/windows/google/tcmalloc.h +R098 src/windows/google/tcmalloc.h.in +src/windows/gperftools/tcmalloc.h.in +M src/windows/patch_functions.cc +M +vsprojects/current_allocated_bytes_test/current_allocated_bytes_test.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/malloc_extension_test/malloc_extension_test.vcproj +M vsprojects/malloc_hook_test/malloc_hook_test.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit e10b720797359e1855a5e4769424988b4e76c3e7 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:06:48 2012 +0000 + + Last of the remnant empty directories that I don't know why they're + there. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@141 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit e55949bba69b7a6eede3468765e86c5ed000be52 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:06:06 2012 +0000 + + This directory got renamed tmu-static a long time ago. Get rid of + this ancient remnant. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@140 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit 0f588815f979721346a6cb05a1cbd83d670b975a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:05:39 2012 +0000 + + Apparently I'm not good at deleting files from svn after they're no + longer used anymore. Get rid of a few more that fall into this + category. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@139 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +D src/stacktrace_nacl-inl.h +D src/stacktrace_x86_64-inl.h + +commit 2b2016f6844a88b9448d445f90ad1c849a56ea23 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:03:35 2012 +0000 + + I have no idea why I created this directory, but it's never been + used. Delete it. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@138 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit ace2da06732ee549b6d64c1f59b62181f5110230 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 4 00:02:15 2012 +0000 + + This file has been obsolete for almost a year now; I just forgot to + delete it from svn. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@137 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +D src/base/atomicops-internals-arm-gcc.h + +commit 0afb078b34700859cef8d708463fc5c955e26c31 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jan 31 19:11:26 2012 +0000 + + Tue Jan 31 10:43:50 2012 Google Inc. <opensource@google.com> + + * google-perftools: version 1.10 release + * PORTING: Support for patching assembly on win + x86_64! (scott.fr...) + * PORTING: Work around atexit-execution-order bug on freebsd + (csilvers) + * PORTING: Patch _calloc_crt for windows (roger orr) + * PORTING: Add C++11 compatibility method for stl allocator + (jdennett) + * PORTING: use MADV_FREE, not MADV_DONTNEED, on freebsd + (csilvers) + * PORTING: Don't use SYS_open when not supported on solaris + (csilvers) + * PORTING: Do not assume uname() returns 0 on success + (csilvers) + * LSS: Improved ARM support in linux-syscall-support + (dougkwan) + * LSS: Get rid of unused syscalls in linux-syscall-support + (csilvers) + * LSS: Fix broken mmap wrapping for ppc (markus) + * LSS: Emit .cfi_adjust_cfa_offset when appropriate + (ppluzhnikov) + * LSS: Be more accurate in register use in __asm__ (markus) + * LSS: Fix __asm__ calls to compile under clang (chandlerc) + * LSS: Fix ARM inline assembly bug around r7 and swi (lcwu) + * No longer log when an allocator fails (csilvers) + * void* -> const void* for MallocExtension methods (llib) + * Improve HEAP_PROFILE_MMAP and fix bugs with it (dmikurube) + * Replace int-based abs with more correct fabs in a test + (pmurin) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@135 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M NEWS +M README_windows.txt +M configure +M configure.ac +M google-perftools.sln +M packages/deb/changelog +M packages/deb/copyright +M src/pprof +M src/system-alloc.cc +M src/tcmalloc.cc +M src/windows/config.h +M src/windows/mini_disassembler.h +M src/windows/patch_functions.cc +M src/windows/preamble_patcher.h +M src/windows/preamble_patcher_test.cc +M src/windows/shortproc.asm +A vsprojects/preamble_patcher_test/preamble_patcher_test.vcproj + +commit e0eaf5981eede6311e311ac5054489b85015c5d7 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jan 28 00:13:43 2012 +0000 + + * PORTING: Support for patching assembly on win + x86_64! (scott.fr...) + * PORTING: Work around atexit-execution-order bug on freebsd + (csilvers) + * PORTING: Patch _calloc_crt for windows (roger orr) + * PORTING: Add C++11 compatibility method for stl allocator + (jdennett) + * PORTING: use MADV_FREE, not MADV_DONTNEED, on freebsd + (csilvers) + * PORTING: Don't use SYS_open when not supported on solaris + (csilvers) + * PORTING: Do not assume uname() returns 0 on success + (csilvers) + * LSS: Improved ARM support in linux-syscall-support + (dougkwan) + * LSS: Get rid of unused syscalls in linux-syscall-support + (csilvers) + * LSS: Fix broken mmap wrapping for ppc (markus) + * LSS: Emit .cfi_adjust_cfa_offset when appropriate + (ppluzhnikov) + * LSS: Be more accurate in register use in __asm__ (markus) + * LSS: Fix __asm__ calls to compile under clang (chandlerc) + * LSS: Fix ARM inline assembly bug around r7 and swi (lcwu) + * No longer log when an allocator fails (csilvers) + * void* -> const void* for MallocExtension methods (llib) + * Improve HEAP_PROFILE_MMAP and fix bugs with it (dmikurube) + * Replace int-based abs with more correct fabs in a test + (pmurin) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@134 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS +M configure +M configure.ac +M doc/cpuprofile.html +M src/base/atomicops-internals-arm-v6plus.h +M src/base/linux_syscall_support.h +M src/base/stl_allocator.h +M src/base/sysinfo.cc +M src/debugallocation.cc +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/sampler_test.cc +M src/thread_cache.cc +M src/windows/ia32_opcode_map.cc +M src/windows/mini_disassembler.cc +M src/windows/mini_disassembler.h +M src/windows/mini_disassembler_types.h +M src/windows/patch_functions.cc +M src/windows/preamble_patcher.cc +M src/windows/preamble_patcher.h +A src/windows/preamble_patcher_test.cc +M src/windows/preamble_patcher_with_stub.cc +A src/windows/shortproc.asm + +commit 4fa02db374683d3c69c1b36158534c011513ed31 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 24 01:00:32 2011 +0000 + + Ready to release perftools 1.9.1 :-/ + + Added the ARM stacktrace file to the tarball (for 'make dist') + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@132 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M NEWS +M configure +M configure.ac + +commit 2a7b3b88371c2f78ab31be202f384fd0ed466890 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Dec 23 00:45:49 2011 +0000 + + Thu Dec 22 16:22:45 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.9 release + * Lightweight check for double-frees (blount) + * BUGFIX: Fix pprof to exit properly if run with no args + (dagitses) + * Suggest ASan as a way to diagnose buggy code (ppluzhnikov) + * Get rid of unused CACHELINE_SIZE (csilvers) + * Replace atexit() calls with global dtors; helps freebsd + (csilvers) + * Disable heap-checker under AddressSanitizer (kcc) + * Fix bug in powerpc stacktracing (ppluzhnikov) + * PERF: Use exponential backoff waiting for spinlocks (m3b) + * Fix 64-bit nm on 32-bit binaries in pprof (csilvers) + * Add ProfileHandlerDisallowForever (rsc) + * BUGFIX: Shell escape when forking in pprof (csilvers) + * No longer combine overloaded functions in pprof (csilvers) + * Fix address-normalizing bug in pprof (csilvers) + * More consistently call abort() instead of exit() on failure + (csilvers) + * Allow NoGlobalLeaks to be safely called more than once + (csilvers) + * PORTING/BUGFIX: Fix ARM cycleclock to use volatile asm + (dougkwan) + * PORTING: 64-bit atomic ops for ARMv7 (dougkwan) + * PORTING: Implement stacktrace for ARM (dougkwan) + * PORTING: Fix malloc_hook_mmap_linux for ARM (dougkwan) + * PORTING: Update linux_syscall_support.h for ARM/etc + (evannier, sanek) + * PORTING: Fix freebsd to work on x86_64 (chapp...@gmail.com) + * PORTING: Added additional SYS_mmap fixes for FreeBSD + (chappedm) + * PORTING: Allow us to compile on OS X 10.6 and run on 10.5 + (raltherr) + * PORTING: Check for mingw compilers that *do* define timespec + * PORTING: Add "support" for MIPS cycletimer + * PORTING: Fix fallback cycle-timer to work with Now + (dougkwan) + * PERF: Move stack trace collecting out of the mutex (taylorc) + * PERF: Get the deallocation stack trace outside the mutex + (sean) + * Make PageHeap dynamically allocated for leak checks (maxim) + * BUGFIX: Fix probing of nm -f behavior in pprof (dpeng) + * BUGFIX: Fix a race with the CentralFreeList lock before main + (sanjay) + * Support /pprof/censusprofile url arguments (rajatjain) + * Change IgnoreObject to return its argument (nlewycky) + * Update malloc-hook files to support more CPUs + * BUGFIX: write our own strstr to avoid libc problems + (csilvers) + * Use simple callgrind compression facility in pprof + * Print an error message when we can't run pprof to symbolize + (csilvers) + * Die in configure when g++ is't installed (csilvers) + * DOC: Beef up the documentation a bit about using libunwind + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@130 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M NEWS +M configure +M configure.ac +M packages/deb/changelog +M src/pprof +M src/windows/config.h + +commit 75f5a791bc1c0af0ddf43453c18f44723884f37e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Dec 23 00:45:14 2011 +0000 + + Add the magical incantations to get perftools to compile + cleanly under windows. The only non-trivial one is the #undef + of small, which I needed to compile under the latest cygwin -- + I ran g++ with -dD under cygwin and saw the '#define small + char' with my own eyes. I wouldn't have thought it... + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@129 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/internal_logging.h +M src/tcmalloc.cc +M src/windows/port.h + +commit 76131b04cf41a7f0fed99503f985847fc261b930 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Dec 23 00:44:53 2011 +0000 + + In my testing for a new release, I was noticing hangs on + cygwin, that went away when I turned off tls support. Since I + didn't use to have such problems, I am being conservative and + only turning off TLS for the particular version of cygwin I'm + testing on (as returned by uname()). + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@128 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/thread_cache.cc + +commit 3d30fbcc85021b267f7e3faf4c2280d70681920c +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Dec 23 00:44:30 2011 +0000 + + We use mmx instructions now in perftools, so specify -xmms for + gcc on i386, where it's not on by default (it is for + gcc/x86_64, in my tests). This could potentially cause an + error for embedded systems, which can have i386 but no mms, + but the code wouldn't run properly on them anyway without + tweaks. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@127 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in + +commit f622d261e9a4331724d222cc2a41b517607c07d7 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Dec 20 18:43:38 2011 +0000 + + * Fix ARM cycleclock to use volatile asm (dougkwan) + * BUGFIX: Define BASE_HAS_ATOMIC64 for ARMv7 (dougkwan) + * Fix fallback cycle-timer to work with Now (dougkwan) + * Fix pprof to exit properly if run with no args (dagitses) + * Suggest ASan as a way to diagnose buggy code (ppluzhnikov) + * Get rid of unused CACHELINE_SIZE (csilvers) + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@126 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-v6plus.h +M src/base/basictypes.h +M src/base/cycleclock.h +M src/debugallocation.cc +M src/malloc_hook_mmap_freebsd.h +M src/pprof +M src/windows/port.h + +commit 7da02003014644f73893df8530ee788dc0e626de +Author: chappedm@gmail.com +<chappedm@gmail.com@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Nov 30 02:54:35 2011 +0000 + + Added additional SYS_mmap fixes for FreeBSD due to syscall differences + between os versions (pulled from differences between FreeBSD6 and + FreeBSD8 mmap implementations) + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@125 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/malloc_hook_mmap_freebsd.h + +commit 8c8329390cd05a1bf1bf9613712521a9ee9fe260 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Nov 22 01:06:33 2011 +0000 + + * Eliminate some TSAN warnings (wilsonh) + * Better fix for freebsd on x86_64 (chapp...@gmail.com) + * Lightweight check for double-frees (blount) + * 64-bit atomic ops for ARMv7 (dougkwan) + * Remove dynamic annotations in some cases (dvyukov) + * Update linux_syscall_support.h for ARM, etc (evannier) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@124 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-v6plus.h +M src/base/dynamic_annotations.c +M src/malloc_hook_mmap_freebsd.h +M src/profile-handler.cc +M src/thread_cache.h + +commit ca23f57679e2bd4e0390d6891aa765b7e9daca22 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Nov 17 00:11:42 2011 +0000 + + Fix bad #include paths. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@123 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/stacktrace.cc +M src/stacktrace_config.h + +commit e580d7888154fa7f95b3cef9e18f1ce69182212b +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Nov 16 21:20:45 2011 +0000 + + Some linux_syscall_support.h improvements: + + * Fix socketcall functions (mec) + * Add ARM related defs and syscall asembly code (sanek) + * Use __arm__ instead of particular ARM arch macro (dougkwan) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@122 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/linux_syscall_support.h + +commit 6c3eaabd7306173b6b71b63797ebf050675046cf +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Nov 16 05:21:54 2011 +0000 + + * Check for mingw compilers that *do* define timespec + * Replace atexit() calls with global dtors; helps freebsd + (csilvers) + * Fix malloc_hook_mmap_linux for ARM (dougkwan) + * Disalbe heap-checker under AddressSanitizer (kcc) + * Fix bug in powerpc stacktracing (ppluzhnikov) + * Use exponential backoff waiting for spinlocks (m3b) + * Fix 64-bit nm on 32-bit binaries in pprof (csilvers) + * Implement stacktrace for ARM (dougkwan) + * Add ProfileHandlerDisallowForever (rsc) + * Shell escape when forking in pprof (csilvers) + * Fix freebsd to work on x86_64 (chapp...@gmail.com) + * No longer combine overloaded functions in pprof (csilvers) + * Fix address-normalizing bug in pprof (csilvers) + * More consistently call abort() instead of exit() on failure + (csilvers) + * Allow NoGlobalLeaks to be safely called more than once + (csilvers) + * Beef up the documentation a bit about using libunwind + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@121 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.am +M Makefile.in +M src/base/googleinit.h +M src/base/linux_syscall_support.h +M src/base/logging.h +M src/base/spinlock_internal.cc +M src/base/spinlock_linux-inl.h +M src/base/spinlock_posix-inl.h +M src/base/spinlock_win32-inl.h +M src/debugallocation.cc +M src/google/heap-checker.h +M src/heap-checker.cc +M src/malloc_hook_mmap_freebsd.h +M src/malloc_hook_mmap_linux.h +M src/pprof +M src/profile-handler.cc +M src/profile-handler.h +M src/stacktrace.cc +C059 src/stacktrace_powerpc-inl.h src/stacktrace_arm-inl.h +M src/stacktrace_config.h +M src/stacktrace_powerpc-inl.h +M src/stacktrace_x86-inl.h +M src/tests/heap-checker_unittest.cc +M src/tests/profile-handler_unittest.cc + +commit a6076edd177d59e67207753b799ce047a3663cb0 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Oct 18 20:57:45 2011 +0000 + + * Get the deallocation stack trace outside the lock (sean) + * Make PageHeap dynamically allocated for leak checks (maxim) + * BUGFIX: Fix probing of nm -f behavior in pprof (dpeng) + * PORTING: Add "support" for MIPS cycletimer + * BUGFIX: Fix a race with the CentralFreeList lock (sanjay) + * Allow us to compile on OS X 10.6 and run on 10.5 (raltherr) + * Support /pprof/censusprofile url arguments (rajatjain) + * Die in configure when g++ is't installed (csilvers) + * Change IgnoreObject to return its argument (nlewycky) + * Update malloc-hook files to support more CPUs + * Move stack trace collecting out of the mutex (taylorc) + * BUGFIX: write our own strstr to avoid libc problems + (csilvers) + * use simple callgrind compression facility in pprof + * print an error message when we can't run pprof to symbolize + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@120 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.am +M Makefile.in +M configure +M configure.ac +M packages/rpm.sh +M src/base/basictypes.h +M src/base/cycleclock.h +M src/base/dynamic_annotations.h +M src/base/spinlock_internal.cc +M src/base/spinlock_linux-inl.h +M src/central_freelist.cc +M src/central_freelist.h +M src/common.cc +M src/common.h +M src/debugallocation.cc +M src/google/heap-checker.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/libc_override_osx.h +M src/malloc_hook.cc +M src/malloc_hook_mmap_linux.h +M src/memfs_malloc.cc +M src/page_heap.cc +M src/page_heap.h +M src/page_heap_allocator.h +M src/pprof +M src/profile-handler.cc +M src/profiler.cc +M src/span.cc +M src/span.h +M src/stack_trace_table.cc +M src/stacktrace.cc +M src/stacktrace_config.h +M src/static_vars.cc +M src/static_vars.h +M src/symbolize.cc +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/config.h + +commit c2eedce2a718913ed6264ac8e96571c233761e3b +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Aug 26 21:08:59 2011 +0000 + + Fri Aug 26 13:29:25 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.3 release + * Added back the 'pthreads unsafe early' #define, needed + for FreeBSD + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@117 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M NEWS +M configure +M configure.ac +M src/config.h.in +M src/pprof +M src/windows/config.h + +commit f633b40ba5d3785c7e31fd9b575c5cf5e7c174f3 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Aug 11 22:06:22 2011 +0000 + + Thu Aug 11 15:01:47 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.2 release + * Fixed calculation of patchlevel, 'make check' should all + pass again + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@115 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M NEWS +M configure +M configure.ac +M src/pprof +M src/windows/config.h + +commit ffcdf7bf5a7e97428f7521b3c9e6d854e3a424a9 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Aug 11 22:05:09 2011 +0000 + + Fix #includes for ARM. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@114 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-arm-generic.h + +commit d2faf4646dc79486babfbd63cf5f658b917dd6ab +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jul 27 04:18:01 2011 +0000 + + Tue Jul 26 20:57:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8 release + * Added an #include to fix compile breakage on latest gcc's + * Removed an extra , in the configure.ac script + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@112 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M NEWS +M configure +M configure.ac +M src/base/elf_mem_image.cc +M src/config.h.in +M src/pprof +M src/tests/tcmalloc_unittest.cc +M src/windows/config.h + +commit 100c38c1a225446c1bbeeaac117902d0fbebfefe +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jul 16 01:07:10 2011 +0000 + + Fri Jul 15 16:10:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8 release + * PORTING: (Disabled) support for patching mmap on freebsd + (chapp...) + * PORTING: Support volatile __malloc_hook for glibc 2.14 + (csilvers) + * PORTING: Use _asm rdtsc and __rdtsc to get cycleclock in + windows (koda) + * PORTING: Fix fd vs. HANDLE compiler error on cygwin + (csilvers) + * PORTING: Do not test memalign or double-linking on OS X + (csilvers) + * PORTING: Actually enable TLS on windows (jontra) + * PORTING: Some work to compile under Native Client (krasin) + * PORTING: deal with pthread_once w/o -pthread on freebsd + (csilvers) + * Rearrange libc-overriding to make it easier to port + (csilvers) + * Display source locations in pprof disassembly (sanjay) + * BUGFIX: Actually initialize allocator name (mec) + * BUGFIX: Keep track of 'overhead' bytes in malloc reporting + (csilvers) + * Allow ignoring one object twice in the leak checker (glider) + * BUGFIX: top10 in pprof should print 10 lines, not 11 (rsc) + * Refactor vdso source files (tipp) + * Some documentation cleanups + * Document MAX_TOTAL_THREAD_CACHE_SIZE <= 1Gb (nsethi) + * Add MallocExtension::GetOwnership(ptr) (csilvers) + * BUGFIX: We were leaving out a needed $(top_srcdir) in + the Makefile + * PORTING: Support getting argv0 on OS X + * Add 'weblist' command to pprof: like 'list' but html + (sanjay) + * Improve source listing in pprof (sanjay) + * Cap cache sizes to reduce fragmentation (ruemmler) + * Improve performance by capping or increasing sizes + (ruemmler) + * Add M{,un}mapReplacmenet hooks into MallocHook (ribrdb) + * Refactored system allocator logic (gangren) + * Include cleanups (csilvers) + * Add TCMALLOC_SMALL_BUT_SLOW support (ruemmler) + * Clarify that tcmalloc stats are MiB (robinson) + * Remove support for non-tcmalloc debugallocation (blount) + * Add a new test: malloc_hook_test (csilvers) + * Change the configure script to be more crosstool-friendly + (mcgrathr) + * PORTING: leading-underscore changes to support win64 + (csilvers) + * Improve debugallocation tc_malloc_size (csilvers) + * Extend atomicops.h and cyceclock to use ARM V6+ optimized + code (sanek) + * Change malloc-hook to use a list-like structure (llib) + * Add flag to use MAP_PRIVATE in memfs_malloc (gangren) + * Windows support for pprof: nul and /usr/bin/file (csilvers) + * TESTING: add test on strdup to tcmalloc_test (csilvers) + * Augment heap-checker to deal with no-inode maps (csilvers) + * Count .dll/.dylib as shared libs in heap-checker (csilvers) + * Disable sys_futex for arm; it's not always reliable (sanek) + * PORTING: change lots of windows/port.h macros to functions + * BUGFIX: Generate correct version# in tcmalloc.h on windows + (csilvers) + * PORTING: Some casting to make solaris happier about types + (csilvers) + * TESTING: Disable debugallocation_test in 'minimal' mode + (csilvers) + * Rewrite debugallocation to be more modular (csilvers) + * Don't try to run the heap-checker under valgrind + (ppluzhnikov) + * BUGFIX: Make focused stat %'s relative, not absolute + (sanjay) + * BUGFIX: Don't use '//' comments in a C file (csilvers) + * Quiet new-gcc compiler warnings via -Wno-unused-result, + etc (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@110 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M NEWS +M README +M configure +M configure.ac +M doc/heapprofile.html +M doc/tcmalloc.html +M google-perftools.sln +M packages/deb/changelog +M src/base/atomicops-internals-windows.h +M src/base/atomicops.h +M src/base/cycleclock.h +C067 src/base/vdso_support.cc src/base/elf_mem_image.cc +A src/base/elf_mem_image.h +M src/base/low_level_alloc.cc +M src/base/spinlock_internal.cc +M src/base/vdso_support.cc +M src/base/vdso_support.h +M src/central_freelist.cc +M src/central_freelist.h +M src/common.cc +M src/common.h +M src/config.h.in +M src/debugallocation.cc +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/malloc_hook.h +M src/heap-checker.cc +A src/libc_override.h +A src/libc_override_gcc_and_weak.h +A src/libc_override_glibc.h +A src/libc_override_osx.h +A src/libc_override_redefine.h +M src/malloc_extension.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +A src/malloc_hook_mmap_freebsd.h +A src/malloc_hook_mmap_linux.h +M src/maybe_threads.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/pprof +M src/profile-handler.cc +M src/profiler.cc +M src/stack_trace_table.h +M src/stacktrace.cc +M src/stacktrace_config.h +C053 src/stack_trace_table.h src/stacktrace_nacl-inl.h +M src/symbolize.cc +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/debugallocation_test.cc +M src/tests/malloc_extension_c_test.c +M src/tests/malloc_extension_test.cc +M src/tests/malloc_hook_test.cc +M src/tests/profiler_unittest.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc +M src/windows/config.h +M src/windows/google/tcmalloc.h.in +M src/windows/mingw.h +M src/windows/port.cc +M src/windows/port.h +M +vsprojects/current_allocated_bytes_test/current_allocated_bytes_test.vcproj +M vsprojects/malloc_hook_test/malloc_hook_test.vcproj + +commit 8c7d2289d24f7a49f1f6f60d4a6eaee06fa04c60 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu May 19 21:37:12 2011 +0000 + + * Fix typos in comment in profiler.h (nrhodes) + * #include fixes (jyrki) + * Add missing stddef.h for ptrdiff_t (mec) + * Add M{,un}mapReplacement hooks into MallocHook (ribrdb) + * Force big alloc in frag test (ruemmler) + * PERF: Increase the size class cache to 64K entries + (ruemmler) + * PERF: Increase the transfer cache by 16x (ruemmler) + * Use windows intrinsic to get the tsc (csilvers) + * Rename atomicops-internals-x86-msvc.h->windows.h (csilvers) + * Remove flaky DEATH test in malloc_hook_test (ppluzhnikov) + * Expose internal ReadStackTraces()/etc (lantran) + * Refactored system allocator logic (gangren) + * Include-what-you-use: cleanup tcmalloc #includes (csilvers) + * Don't set kAddressBits to 48 on 32-bit systems (csilvers) + * Add declaration for __rdtsc() for windows (koda) + * Don't revert to system alloc for expected errors (gangren) + * Add TCMALLOC_SMALL_BUT_SLOW support (ruemmler) + * Clarify that tcmalloc stats are MiB (robinson) + * Avoid setting cpuinfo_cycles_per_second to 0 (koda) + * Fix frag_unittest memory calculations (ruemmler) + * Remove support for non-tcmalloc debugallocation (blount) + * Add malloc_hook_test (llib) + * Change the objcopy -W test to be cross-friendly (mcgrathr) + * Export __tcmalloc in addition to _tcmalloc, for 86_64 + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@109 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M Makefile.am +M Makefile.in +M README +M README_windows.txt +M configure +M configure.ac +M google-perftools.sln +R096 src/base/atomicops-internals-x86-msvc.h +src/base/atomicops-internals-windows.h +M src/base/atomicops.h +M src/base/cycleclock.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/vdso_support.h +M src/central_freelist.cc +M src/central_freelist.h +M src/common.cc +M src/common.h +M src/config.h.in +M src/debugallocation.cc +M src/google/malloc_extension.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/tcmalloc.h.in +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/malloc_extension.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/packed-cache-inl.h +M src/page_heap.cc +M src/page_heap.h +M src/page_heap_allocator.h +M src/pagemap.h +M src/sampler.cc +M src/sampler.h +M src/span.cc +M src/stack_trace_table.cc +M src/stack_trace_table.h +M src/static_vars.cc +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/frag_unittest.cc +M src/tests/heap-checker_unittest.cc +A src/tests/malloc_hook_test.cc +M src/tests/memalign_unittest.cc +M src/tests/page_heap_test.cc +M src/tests/realloc_unittest.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_large_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/patch_functions.cc +M src/windows/port.cc +A vsprojects/malloc_hook_test/malloc_hook_test.vcproj + +commit 1d30e525ae6ac38ae381bb3118f7f47998af2942 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Mar 21 21:41:55 2011 +0000 + + * Improve debugallocation tc_malloc_size (csilvers) + * Extend atomicops.h to use ARM V6+ optimized code (sanek) + * Fix failure in Ranges test (ppluzhnikov) + * Change malloc-hook to use a list-like structure (llib) + * Update tcmalloc_regtest to use new malloc hooks (llib) + * PARTIAL: Keep track of 'overhead' bytes in the page cache + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@108 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M google-perftools.sln +A src/base/atomicops-internals-arm-generic.h +A src/base/atomicops-internals-arm-v6plus.h +M src/base/atomicops.h +M src/central_freelist.cc +M src/central_freelist.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/heap-checker-bcad.cc +M src/heap-checker.cc +M src/heap-profiler.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/tcmalloc.cc +A src/tests/current_allocated_bytes_test.cc +M src/tests/low_level_alloc_unittest.cc +M src/tests/malloc_extension_c_test.c +M src/tests/tcmalloc_unittest.cc +A +vsprojects/current_allocated_bytes_test/current_allocated_bytes_test.vcproj + +commit 6fe07cd2c0527e18276cc79a57e2212a4b048746 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Mar 4 23:52:33 2011 +0000 + + * add a flag to use MAP_PRIVATE in memfs_malloc (gangren) + * pthread_self() is now safe to use early (ppluzhnikov) + * windows support for pprof: nul and /usr/bin/file (csilvers) + * fix tc_malloc_size for debugallocation (csilvers) + * add test on strdup to tcmalloc_test (csilvers) + * augment heap-checker to deal with no-inode maps (csilvers) + * Get rid of -Wno-unused-result: not all gcc's support it + (csilvers) + * /bin/true -> ':', which is faster and more portable + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@107 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M doc/tcmalloc.html +M src/debugallocation.cc +M src/heap-checker.cc +M src/memfs_malloc.cc +M src/pprof +M src/tcmalloc.cc +M src/tests/debugallocation_test.sh +M src/tests/memalign_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc + +commit 75584139e40c9d6c952d9c5339c52e5b58302fc8 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Mar 2 08:10:05 2011 +0000 + + * Enhance cycleclock on ARM v6 and above (sanek) + * Reduce object copying by using a reference (nherring) + * Modified lock annotations a bit (lcwu) + * Make debugallocation test a bit more forgiving (csilvers) + * Count .dll/.dylib as shared libs in heapchecker (csilvers) + * Disable sys_futex for arm (sanek) + * Don't use macros as much in windows/port.h (andrey.s...) + * Update #includes in case malloc.h is in weird places + (csilvers) + * Turn off some not-so-useful warnings in gcc 4 (csilvers) + * Do some casting to make solaris happier about types + (csilvers) + * Disable debugallocation_test in 'minimal' mode (csilvers) + * Rewrite debugallocation to be more modular (csilvers) + * We can't run the heap-checker under valgrind (ppluzhnikov) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@106 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M configure +M configure.ac +C055 src/raw_printer.cc src/base/arm_instruction_set_select.h +M src/base/cycleclock.h +M src/base/logging.h +M src/base/low_level_alloc.cc +M src/base/spinlock_linux-inl.h +M src/base/stl_allocator.h +M src/base/sysinfo.cc +M src/config.h.in +M src/debugallocation.cc +M src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/internal_logging.cc +M src/raw_printer.cc +M src/tcmalloc.cc +M src/tests/debugallocation_test.cc +M src/tests/heap-checker_unittest.cc +M src/tests/memalign_unittest.cc +M src/tests/sampler_test.cc +M src/windows/config.h +R094 src/windows/google/tcmalloc.h src/windows/google/tcmalloc.h.in +M src/windows/port.cc +M src/windows/port.h + +commit c1abbfae802af5bf949c78e0bfdfd58d5c669a86 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 19 00:11:09 2011 +0000 + + * avoid unused-var warnings with annotalysis (lcwu) + * make focused stats %'s relative, not absolute (sanjay) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@105 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/dynamic_annotations.h +M src/pprof + +commit dd3d9d969e9b889e96c2af497e436856fac1a2a7 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Feb 8 01:03:37 2011 +0000 + + * Fix tcmalloc_unittest on MSVC 10 in release mode (csilvers) + * Fix malloc_hook_c.h to compile with -ansi under gcc + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@104 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS +M src/google/malloc_hook_c.h +M src/tests/tcmalloc_unittest.cc + +commit 7375b4f3cb3ab4471d0016017be5e18ba5451c5f +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Feb 5 00:19:37 2011 +0000 + + Fri Feb 04 15:54:31 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.7 release + * Reduce page map key size under x86_64 by 4.4MB (rus) + * Remove a flaky malloc-extension test (fdabek) + * Improve the performance of PageHeap::New (ond..., csilvers) + * Improve sampling_test with no-inline additions/etc (fdabek) + * 16-byte align debug allocs (jyasskin) + * Change FillProcSelfMaps to detect out-of-buffer-space + (csilvers) + * Document the need for sampling to use GetHeapSample + (csilvers) + * Try to read TSC frequency from tsc_freq_khs (adurbin) + * Do better at figuring out if tests are running under gdb + (ppluzhnikov) + * Improve spinlock contention performance (ruemmler) + * Better internal-function list for pprof's /contention + (ruemmler) + * Speed up GoogleOnce (m3b) + * Limit number of incoming/outgoing edges in pprof (sanjay) + * Add pprof --evince to go along with --gv (csilvers) + * Document the various ways to get heap-profiling information + (csilvers) + * Separate out synchronization profiling routines (ruemmler) + * Improve malloc-stats output to be more understandable + (csilvers) + * Add support for census profiler in pporf (nabeelmian) + * Document how pprof's /symbol must support GET requests + (csilvers) + * Improve acx_pthread.m4 (ssuomi, liujisi) + * Speed up pprof's ExtractSymbols (csilvers) + * Ignore some known-leaky (java) libraries in the heap checker + (davidyu) + * Make kHideMask use all 64 bits in tests (ppluzhnikov) + * Clean up pprof input-file handling (csilvers) + * BUGFIX: Don't crash if __environ is NULL (csilvers) + * BUGFIX: Fix totally broken debugallocation tests (csilvers) + * BUGFIX: Fix up fake_VDSO handling for unittest (ppluzhnikov) + * BUGFIX: Suppress all large allocs when report threshold is 0 + (lexie) + * BUGFIX: mmap2 on i386 takes an off_t, not off64_t (csilvers) + * PORTING: Add missing PERFTOOLS_DLL_DECL (csilvers) + * PORTING: Add stddef.h to make newer gcc's happy (csilvers) + * PORTING: Document some tricks for working under OS X + (csilvers) + * PORTING: Don't try to check valgrind for windows (csilvers) + * PORTING: Make array-size a var to compile under clang + (chandlerc) + * PORTING: No longer hook _aligned_malloc and _aligned_free + (csilvers) + * PORTING: Quiet some gcc warnings (csilvers) + * PORTING: Replace %PRIxPTR with %p to be more portable + (csilvers) + * PORTING: Support systems that capitalize /proc weirdly + (sanek) + * PORTING: Treat arm3 the same as arm5t in cycletimer + (csilvers) + * PORTING: Update windows logging to not allocate memory + (csilvers) + * PORTING: avoid double-patching newer windows DLLs + (roger.orr) + * PORTING: get dynamic_annotations.c to work on windows + (csilvers) + * Add pkg-config .pc files for the 5 libraries we produce + (csilvers) + * Added proper libtool versioning, so this lib will be 0.1.0 + (csilvers) + * Moved from autoconf 2.64 to 2.65 + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@102 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M NEWS +M README +M configure +M configure.ac +M m4/acx_pthread.m4 +M packages/deb/changelog +M src/base/cycleclock.h +M src/base/dynamic_annotations.c +M src/base/logging.h +M src/base/stl_allocator.h +M src/base/vdso_support.cc +M src/debugallocation.cc +M src/heap-checker.cc +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/symbolize.h +M src/system-alloc.cc +M src/tests/debugallocation_test.cc +M src/tests/debugallocation_test.sh +M src/tests/malloc_extension_test.cc +M src/windows/port.h + +commit 3d77cbf7d569a7c7f0ce39a83f6c98da1718f1c4 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jan 19 21:37:15 2011 +0000 + + * Make kHideMask use all 64 bits (ppluzhnikov) + * Add new IsDebuggerAttached method (ppluzhnikov) + * Document some tricks for maybe getting perftools to work + on OS X + * Redo file-top pprof commands (csilvers) + * Clean up pprof input-file handling (csilvers) + * 16-byte align debug allocs (jyasskin) + * Ignore JVM memory leakage in the heap checker (davidyu, + kkurimoto) + * Better internal-function list for contentionz (ruemmler) + * mmap2 on i386 takes an off_t, not an off64_t (csilvers) + * Fix up fake-VDSO handling for unittest (ppluzhnikov) + * Don't try to check valgrind for windows (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@101 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M README +M src/base/dynamic_annotations.c +M src/base/linux_syscall_support.h +M src/base/vdso_support.cc +M src/base/vdso_support.h +M src/debugallocation.cc +M src/heap-checker.cc +M src/malloc_hook.cc +M src/pprof +M src/tcmalloc.cc +M src/tests/heap-checker_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/config.h + +commit a0a2ff3b493481ff52e4b4deec2fcc494756b64a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Nov 24 00:33:20 2010 +0000 + + * PORTING: Add PERFTOOLS_DLL_DECL to malloc_hook_c.h + (csilvers) + * Add new .h files to Makefile (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@100 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M src/google/malloc_hook_c.h + +commit 3014cf142e5a2409c88ab4559f3274434ed9a29b +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Nov 18 01:07:25 2010 +0000 + + * Suppress all large allocs when report threshold==0 + * Clarified meaning of various malloc stats + * Change from ATTRIBUTED_DEPRECATED to comments + * Make array-size a var to compile under clang + * Reduce page map key size under x86_64 by 4.4MB + * Added full qualification to MemoryBarrier + * Support systems that capitalize /proc weirdly + * Avoid gcc warning: exporting type in unnamed ns + * Add some dynamic annotations for gcc attributes + * Add support for census profiler in pprof + * Speed up pprof's ExtractSymbols + * Speed up GoogleOnce + * Add pkg-config (.pc) files + * Detect when __environ exists but is NULL + * Improve spinlock contention performance + * Add GetFreeListSizes + * Improve sampling_test, eg by adding no-inline + * Relax malloc_extension test-check for big pages + * Add proper library version number information + * Update from autoconf 2.64 to 2.65 + * Better document how to write a server that works with pprof + * Change FillProcSelfMaps to better handle out-of-space + * No longer hook _aligned_malloc/free in windows + * Handle function-forwarding in DLLs when patching (in + windows) + * Update .vcproj files that had wrong .cc files in them (!) + * get rid of unnecessary 'size < 0' + * fix comments a bit in sysinfo.cc + * another go at improving malloc-stats output + * fix comment typo in profiler.cc + * Add a few more thread annotations + * Try to read TSC frequency from 'tsc_freq_khz' + * Fix annotalysis/TSAN incompatibility + * Add pprof --evince to go along with --gv + * Document need for sampling to use GetHeapSample + * Fix flakiness in malloc_extension_test + * Separate out synchronization profiling routines + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@99 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M NEWS +M README +M aclocal.m4 +M configure +M configure.ac +M doc/pprof_remote_servers.html +M ltmain.sh +A m4/libtool.m4 +A m4/ltoptions.m4 +A m4/ltsugar.m4 +A m4/ltversion.m4 +A m4/lt~obsolete.m4 +M packages/deb/libgoogle-perftools-dev.dirs +M packages/deb/libgoogle-perftools-dev.install +M packages/rpm/rpm.spec +A src/base/atomicops-internals-arm-gcc.h +M src/base/basictypes.h +M src/base/dynamic_annotations.h +M src/base/logging.h +M src/base/low_level_alloc.cc +M src/base/spinlock.cc +M src/base/spinlock.h +A src/base/spinlock_internal.cc +C057 src/base/spinlock_win32-inl.h src/base/spinlock_internal.h +M src/base/spinlock_linux-inl.h +M src/base/spinlock_posix-inl.h +M src/base/spinlock_win32-inl.h +C067 src/base/spinlock_win32-inl.h +src/base/synchronization_profiling.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/thread_annotations.h +M src/base/vdso_support.cc +M src/common.h +M src/config.h.in +M src/debugallocation.cc +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/heap-profile-table.cc +M src/malloc_extension.cc +M src/memory_region_map.cc +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/profiler.cc +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/debugallocation_test.cc +M src/tests/malloc_extension_test.cc +M src/tests/sampling_test.cc +M src/tests/sampling_test.sh +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/config.h +M src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/packed-cache_test/packed-cache_test.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 682ff7da1205398376ee725b4ce3219c107b3f8a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Aug 5 20:36:47 2010 +0000 + + Thu Aug 5 12:48:03 PDT 2010 + + * google-perftools: version 1.6 release + * Add tc_malloc_usable_size for compatibility with glibc + (csilvers) + * Override malloc_usable_size with tc_malloc_usable_size + (csilvers) + * Default to no automatic heap sampling in tcmalloc (csilvers) + * Add -DTCMALLOC_LARGE_PAGES, a possibly faster tcmalloc (rus) + * Make some functions extern "C" to avoid false ODR warnings + (jyasskin) + * pprof: Add SVG-based output (rsc) + * pprof: Extend pprof --tools to allow per-tool configs + (csilvers) + * pprof: Improve support of 64-bit and big-endian profiles + (csilvers) + * pprof: Add interactive callgrind suport (weidenri...) + * pprof: Improve address->function mapping a bit (dpeng) + * Better detection of when we're running under valgrind + (csilvers) + * Better CPU-speed detection under valgrind (saito) + * Use, and recommend, -fno-builtin-malloc when compiling + (csilvers) + * Avoid false-sharing of memory between caches (bmaurer) + * BUGFIX: Fix heap sampling to use correct alloc size (bmauer) + * BUGFIX: Avoid gcc 4.0.x bug by making hook-clearing atomic + (csilvers) + * BUGFIX: Avoid gcc 4.5.x optimization bug (csilvers) + * BUGFIX: Work around deps-determining bug in libtool 1.5.26 + (csilvers) + * BUGFIX: Fixed test to use HAVE_PTHREAD, not HAVE_PTHREADS + (csilvers) + * BUGFIX: Fix tls callback behavior on windows when using wpo + (wtc) + * BUGFIX: properly align allocation sizes on Windows (antonm) + * BUGFIX: Fix prototypes for tcmalloc/debugalloc wrt throw() + (csilvers) + * DOC: Updated heap-checker doc to match reality better + (fischman) + * DOC: Document ProfilerFlush, ProfilerStartWithOptions + (csilvers) + * DOC: Update docs for heap-profiler functions (csilvers) + * DOC: Clean up documentation around tcmalloc.slack_bytes + (fikes) + * DOC: Renamed README.windows to README_windows.txt (csilvers) + * DOC: Update the NEWS file to be non-empty (csilvers) + * PORTING: Fix windows addr2line and nm with proper rc code + (csilvers) + * PORTING: Add CycleClock and atomicops support for arm 5 + (sanek) + * PORTING: Improve PC finding on cygwin and redhat 7 + (csilvers) + * PORTING: speed up function-patching under windows (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@97 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M NEWS +M README +M configure +M configure.ac +M doc/heap_checker.html +M m4/pc_from_ucontext.m4 +M packages/deb/changelog +M src/base/atomicops.h +M src/base/cycleclock.h +M src/base/dynamic_annotations.c +M src/base/dynamic_annotations.h +M src/base/sysinfo.cc +M src/base/thread_annotations.h +M src/base/vdso_support.h +M src/common.cc +M src/common.h +M src/google/malloc_extension.h +M src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/heap-profiler.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/pprof +M src/stacktrace_x86-inl.h +M src/tcmalloc.cc +M src/tests/sampler_test.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/google/tcmalloc.h +M src/windows/port.cc +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 488eee994d571da216ef105d4144282c801f0eee +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jun 23 16:42:31 2010 +0000 + + Oops, I thought I had done this last time, but maybe not. Submit the + name change. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@96 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +R092 README.windows README_windows.txt + +commit cb7393cbe2d737009001fd9d30dad568bac7a3d8 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Jun 21 15:59:56 2010 +0000 + + * Default to not sampling in tcmalloc (csilvers) + * Add -DTCMALLOC_LARGE_PAGES: better perf for some workloads + (rus) + * Extend pprof --tools to allow per-tool configs (csilvers) + * Have STL_Allocator pass on # bytes to free (richardfang) + * Add a header guard to config.h (csilvers) + * DOC: Clean up documentation around tcmalloc.slack_bytes + (fikes) + * DOC: Document ProfilerFlush, ProfilerStartWithOptions + (csilvers) + * PORTING: Work around a gcc 4.5.0 optimization bug (csilvers) + * PORTING: Use -fno-builtin-malloc and friends when compiling + tcmalloc + * PORTING: Define _WIN32_WINNT high enough for mingw + (csilvers) + * PORTING: Work around libtool bug getting deps wrong in + some cases + * Update README.windows to emphasize $IncludeDir more + (csilvers) + * Rename README.windows to README_windows.txt (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@95 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M README +M configure +M configure.ac +M doc/cpuprofile.html +M doc/tcmalloc.html +M src/base/dynamic_annotations.c +M src/base/dynamic_annotations.h +M src/base/stl_allocator.h +M src/central_freelist.cc +M src/common.h +M src/config.h.in +M src/google/malloc_extension.h +M src/heap-checker.cc +M src/internal_logging.h +M src/linked_list.h +M src/memory_region_map.h +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/sampler.cc +M src/span.h +M src/tcmalloc.cc +M src/tests/frag_unittest.cc +M src/tests/page_heap_test.cc +M src/tests/testutil.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/config.h +M src/windows/mingw.h + +commit d8c02761689ba909f474b85618f99ac6dfc9a168 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri May 7 21:53:24 2010 +0000 + + * Update docs for heap-profiler fns (csilvers) + * In pprof, accept URLs without ports but with http:// (rsc) + * Refactor sizeclass handling in tcmalloc (bmaurer) + * Always log failed calls to FindAllocDetails (mec) + * Clarify comments for ProfilerStart* (malcolmr) + * Add #include guards to stacktrace_win32-inl.h (glider) + * Add ANNOTATE_ENABLE_RACE_DETECTION(enable) (kcc) + * Make a contentful NEWS file (csilvers) + * Fix addr2line --help (which pprof relies on) for windows + (csilvers) + * Fixes a bug in tcmalloc's TLS callback on windows -static + (wtc) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@94 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M NEWS +M doc/heapprofile.html +M packages/deb/control +M packages/rpm/rpm.spec +M src/base/dynamic_annotations.c +M src/base/dynamic_annotations.h +M src/base/vdso_support.cc +M src/central_freelist.cc +M src/common.h +M src/google/profiler.h +M src/heap-checker.cc +M src/internal_logging.h +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/span.h +M src/stacktrace_win32-inl.h +M src/tcmalloc.cc +M src/tests/page_heap_test.cc +M src/windows/addr2line-pdb.c +M src/windows/nm-pdb.c +M src/windows/port.cc + +commit b0fe220d503eb23830e622939c2e14f084392d1e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Apr 22 17:29:02 2010 +0000 + + * Fix pthread_once extern declarations (willchan) + * Add missing closing brace that resulted in compile failure + (willchan) + * Handle closed stdin/out/err properly when forking (glider) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@93 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/maybe_threads.cc +M src/profile-handler.cc +M src/symbolize.cc +M src/tcmalloc.cc + +commit 21c65ecb037d7d41dcd574c70cd7b7e9d5045462 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Apr 12 21:18:48 2010 +0000 + + * Speed up IsSymbolizedProfile by checking for NUL (csilvers) + * Fix heap sampling to use correct alloc size (bmaurer) + * Make pprof ignore tc_new{,array}_nothrow (csilvers) + * PORTING: Have IsHeapProfilerRunning return an int, for C + (csilvers) + * Avoid false-sharing of memory between caches (bmaurer) + * Fix some variable shadowing (rt) + * SVG-based ouptut in pprof; also, wget->curl (rsc) + * Allow arbitrary prefix before obvious handler (rsc) + * Advertise when using libunwind (ppluzhnikov) + + Also, the install file seems to have reverted back to the default at + some previous point in time (autotools will do that occasionally). + Change that back to have the perftools-specific text in there. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@92 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M INSTALL +M src/base/basictypes.h +M src/google/heap-profiler.h +M src/heap-checker.cc +M src/heap-profiler.cc +M src/page_heap_allocator.h +M src/pprof +M src/stacktrace_config.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/thread_cache.h + +commit 92beff88437b31f4a618640b88487e0f8dfb7017 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Mar 23 20:39:55 2010 +0000 + + * Add new (std::nothrow) to debugallocation (corrado) + * Add a flag to ingore unaligned-ptr leaks (archanakannan) + * PORTING: Add get-pc capabilities for a new OS (csilvers) + * Don't register malloc extension under valgrind (csilvers) + * Fix throw specs for our global operator new (chandlerc) + * PORTING: link to instructions on windows static overrides + (mbelshe) + * Fix prototype differences in debugalloc (chandlerc, + csilvers, wan) + * Change pprof to handle big-endian input files (csilvers) + * Properly align allocation sizes on Windows (antonm) + * Improve IsRunningOnValgrind, using valgrind.h (csilvers, + kcc) + * Improve the accuracy of system_alloc actual_size (csilvers) + * Add interactive callgrind support to pprof (weidenri...) + * Fix off-by-one problems when symbolizing in pprof (dpeng) + * Be more permissive in allowed library names, in pprof + (csilvers) + * PORTING: Fix pc_from_ucontext to handle cygwin and redhat7 + (csilvers) + * Fix stacktrace to avoid inlining (ppluzhnikov) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@91 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M README.windows +M configure +M configure.ac +M doc/heap_checker.html +M m4/pc_from_ucontext.m4 +A src/base/dynamic_annotations.c +D src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +M src/base/low_level_alloc.cc +M src/config.h.in +M src/debugallocation.cc +M src/google/stacktrace.h +M src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/memory_region_map.cc +M src/pprof +M src/stacktrace.cc +M src/stacktrace_generic-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +D src/stacktrace_with_context.cc +M src/stacktrace_x86-inl.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/debugallocation_test.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/profiler_unittest.cc +M src/tests/profiler_unittest.sh +M src/tests/tcmalloc_unittest.cc +A src/third_party/valgrind.h +M src/windows/google/tcmalloc.h +M src/windows/port.cc + +commit 23dd124970bc11636feaa240394063ba5889ca54 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Feb 11 01:32:42 2010 +0000 + + * Speed up patching by not patching modules that are already + loaded + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@88 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/config.h +M src/windows/patch_functions.cc + +commit 8f8a010cab7d3070069a31990cb0e269f03dceb0 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jan 20 23:00:51 2010 +0000 + + Oops, I submitted the wrong version of patch_functions.cc with the + previous commit. This is really the one that goes with release 1.5. + . + ?\027[D + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@86 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/patch_functions.cc + +commit fef86cfeba8915e34575f09499e18b184d7254a9 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jan 20 22:47:29 2010 +0000 + + * google-perftools: version 1.5 release + * Add tc_set_new_mode (willchan) + * Make memalign functions + realloc respect tc_set_new_mode + (willchan) + * Add ReleaseToSystem(num_bytes) (kash) + * Handle zero-length symbols a bit better in pprof (csilvers) + * Prefer __environ to /proc/self/environ in cpu profiler + (csilvers) + * Add HEAP_CHECK_MAX_LEAKS flag to control #leaks to report + (glider) + * Add two new numeric pageheap properties to MallocExtension + (fikes) + * Print alloc size when mmap fails (hakon) + * Add ITIMER_REAL support to cpu profiler (csilvers, + nabeelmian) + * Speed up symbolizer in heap-checker reporting (glider) + * Speed up futexes with FUTEX_PRIVATE_FLAG (m3b) + * Speed up tcmalloc but doing better span coalescing (sanjay) + * Better support for different wget's and addr2maps in pprof + (csilvres) + * Implement a nothrow version of delete and delete[] + (csilvers) + * BUGFIX: fix a race on module_libcs[i] in windows patching + (csilvers) + * BUGFIX: Fix debugallocation to call cpp_alloc for new + (willchan) + * BUGFIX: A simple bugfix for --raw mode (mrabkin) + * BUGFIX: Fix C shims to actually be valid C (csilvers) + * BUGFIX: Fix recursively-unmapped-region accounting + (ppluzhnikov) + * BUGFIX: better distinguish real and fake vdso (ppluzhnikov) + * WINDOWS: replace debugmodule with more reliable psai + (andrey) + * PORTING: Add .bundle as another shared library extension + (csilvers) + * PORTING: Fixed a typo bug in the ocnfigure PRIxx m4 macro + (csilvers) + * PORTING: Augment sysinfo to work on 64-bit OS X (csilvers) + * PORTING: Use sys/ucontext.h to fix compiing on OS X 10.6 + (csilvers) + * PORTING: Fix sysinfo libname reporting for solaris x86 + (jeffrey) + * PORTING: Use libunwind for i386 when using --omitfp + (ppluzhnikov) + + NOTE: This release uses an older version of + src/windows/patch_functions.cc + because I decided the latest optimizations were not well enough tested + for a release. I'll aim to get them into next release. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@84 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M autogen.sh +M configure +M configure.ac +M packages/deb/changelog +M src/pprof +M src/windows/patch_functions.cc + +commit 63b8d63beb7d771713774f9a5d57381cbd29bf19 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Jan 14 16:26:05 2010 +0000 + + * PORTING: Revised patch_functions to avoid deadlock + (csilvers, andrey) + * PORTING: Revised patch_functions to speed up .dll loads + (csilvers) + * PORTING: Build and run sampling_test for windows (csilvers) + * Correctly init tc structs even when libc isn't patched + (csilvers) + * Make low-level allocs async-signal-safe (saito) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@83 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M README.windows +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h +M src/google/malloc_hook.h +M src/malloc_hook.cc +M src/pprof +M src/stacktrace_x86-inl.h +M src/tcmalloc.cc +M src/tests/sampling_test.sh +M src/thread_cache.cc +M src/windows/patch_functions.cc +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit eeeacd5ec4fa36256091f45e5b3af81cee2a4d86 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jan 6 00:34:23 2010 +0000 + + * PORTING: Fix a race condition in windows patching + * PORTING: Use Psapi instead of debugmodule to get windows + module info + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@82 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M README.windows +M google-perftools.sln +M src/page_heap.h +M src/windows/patch_functions.cc +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 6e7479331c751bdfe04d272dbb1bbbe877f0e86a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Dec 15 01:41:30 2009 +0000 + + * Fix a memory leak with repeated Patch() calls on windows + (csilvers) + * Fix a bug when we re-Patch() a previously unpatched lib + (csilvers) + * Add .bundle as another .so extension in pprof (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@81 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/spinlock.h +M src/heap-checker.cc +M src/heap-profile-table.h +M src/memfs_malloc.cc +M src/memory_region_map.h +M src/packed-cache-inl.h +M src/pprof +M src/profile-handler.cc +M src/profiledata.h +M src/windows/patch_functions.cc +M src/windows/preamble_patcher.cc + +commit 1d981b6997f73e6d92b647d042cfe8649d4728c1 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Dec 2 21:42:10 2009 +0000 + + * Make memalign && posix_memalign respect tc_set_new_mode + (willchan) + * Fix windows patch functions to respect tc_set_new_mode + (willchan) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@80 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tcmalloc.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/patch_functions.cc + +commit a94d5f797412bac7b811bf8d69d9298fec54f5cc +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Dec 2 18:15:13 2009 +0000 + + * Prefer __environ to /proc/self/environ (csilvers) + * Add HEAP_CHECK_MAX_LEAKS envvar (glider) + * BUGFIX: debugallocation now calls cpp_alloc for new + (willchan) + * BUGFIX: tc_set_new_mode() respected for realloc and calloc + (willchan) + * BUGFIX: fix opt-mode maybe-crash on debugallocation_test + (csilvers) + * Print alloc size when mmap fails (hakon) + * Add ITIMER_REAL support (csilvers, nabeelmian) + * BUGFIX: correctly report double-frees (csilvers) + * Export tc_set_new_mode() from the .h file (willchan) + * Restructure Symbolize to make it more efficient (glider) + * PORTING: Augment sysinfo to work on 64-bit OS X (csilvers) + * Add two numeric pageheap properties to MallocExtension + (fikes) + * PORTING: Use libunwind for i386 when using --omitfp + (ppluzhnikov) + * Add ReleaseToSystem(num_bytes) (kash) + * Provide correct library filenames under solaris (jeffrey) + * BUGFIX: simple fix in pprof --raw mode (mrabkin) + * PORTING: Prefer sys/ucontext.h to fix OS 10.6 builds + (csilvers) + * Improve support for inlined functions in pprof (sanjay) + * Update wget code to not use keepalive (mrabkin, csilvers) + * PORTING: correctly handle x86_64 machines that use fp's + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@79 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M configure +M configure.ac +M doc/cpuprofile.html +M doc/heap_checker.html +M m4/compiler_characteristics.m4 +M src/base/logging.h +M src/base/sysinfo.cc +M src/config.h.in +M src/debugallocation.cc +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/tcmalloc.h.in +M src/heap-profile-table.cc +M src/malloc_extension.cc +M src/memfs_malloc.cc +M src/pprof +M src/profile-handler.cc +M src/symbolize.cc +M src/symbolize.h +M src/tcmalloc.cc +M src/tests/atomicops_unittest.cc +M src/tests/debugallocation_test.cc +M src/tests/malloc_extension_test.cc +M src/tests/profile-handler_unittest.cc +M src/tests/profiler_unittest.sh +M src/tests/tcmalloc_unittest.cc +M src/windows/config.h +M src/windows/google/tcmalloc.h + +commit 5b80f01df1137337131b4c50ce97faaff9973e90 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Nov 10 16:24:57 2009 +0000 + + * Replace usleep() and poll() with nanosleep() (glider) + * Document problems with _recalloc (csilvers) + * Detect when x86_64 doesn't turn off frame pointers + (csilvers) + * Fix sysinfo.cc/etc to work with 64-bit os x (csilvers) + * BUGFIX: Use __TEXT instead of __DATA to store tcmalloc fns + (csilvers) + * Added two numeric pageheap properties to tcmalloc (fikes) + * Support for mallocranges stats visualization (sanjay) + * Use libunwind for i386, not just x86_64 (ppluzhnikov) + * Add ReleaseToSystem(num_bytes) (kash) + * Provide corect library filenames under solaris (jeffrey) + * BUGFIX: a simple bug in pprof --raw mode (mrabkin) + * Prfer sys/ucontext.h to ucontext.h, to fix OS X 10.6 + (csilvers) + * Improve supprot for inlined functions in pprof (sanjay) + * Document inaccuracies in profiling mmap calls (csilvers) + * Update wget code to not use keepalive (mrabkin, csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@78 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M README.windows +M configure +M configure.ac +M doc/heapprofile.html +M google-perftools.sln +M m4/pc_from_ucontext.m4 +M src/base/basictypes.h +M src/base/dynamic_annotations.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/debugallocation.cc +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/heap-checker.cc +M src/malloc_extension.cc +M src/page_heap.cc +M src/page_heap.h +M src/pagemap.h +M src/pprof +M src/stacktrace_config.h +M src/symbolize.cc +M src/symbolize.h +M src/tcmalloc.cc +M src/tests/malloc_extension_c_test.c +A src/tests/page_heap_test.cc +M src/tests/pagemap_unittest.cc +M src/tests/profile-handler_unittest.cc +M src/tests/tcmalloc_unittest.cc +A vsprojects/page_heap_test/page_heap_test.vcproj + +commit 25eed16e1b042a80c9a3e83bbf7ed227d04fb45a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Oct 27 17:30:52 2009 +0000 + + * Fix Symbolize() to call pprof once, rather than once/symbol + (glider) + * Fix unsetting of hooks before forking, in debug mode (maxim) + * Add some documention for pmuprofile (aruns) + * Speed up futex with FUTEX_PRIVATE_FLAG (m3b) + * Fix os x 10.6: prefer sys/ucontext.h to ucontext.h + (csilvers) + * Fix C shims to be actually valid C: malloc_extension/etc + (csilvers) + * Fix a longtime memset bug (csilvers) + * Implement nothrow versions of delete (csilvers) + * Fix recursively-unmapped-region accounting (ppluzhnikov) + * Better distinguish between real and fake VDSO (ppluzhnikov) + * Modify span coalescing to improve performance (sanjay) + * WINDOWS: Remove unnecessary lock around VirtualAlloc + (mbelshe) + * Remove performance tests for ptmalloc2 (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@77 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M Makefile.am +M Makefile.in +M configure +M configure.ac +M doc/pprof_remote_servers.html +M src/base/spinlock_linux-inl.h +M src/base/vdso_support.cc +M src/config.h.in +M src/debugallocation.cc +M src/getpc.h +M src/google/malloc_extension_c.h +M src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/malloc_extension.cc +M src/memory_region_map.cc +M src/page_heap.cc +M src/page_heap.h +M src/pprof +M src/profiler.cc +M src/sampler.cc +M src/stacktrace_libunwind-inl.h +M src/stacktrace_x86-inl.h +M src/symbolize.cc +M src/symbolize.h +M src/tcmalloc.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +A src/tests/malloc_extension_c_test.c +D src/tests/ptmalloc/COPYRIGHT +D src/tests/ptmalloc/lran2.h +D src/tests/ptmalloc/malloc-machine.h +D src/tests/ptmalloc/t-test.h +D src/tests/ptmalloc/t-test1.c +D src/tests/ptmalloc/t-test2.c +D src/tests/ptmalloc/thread-m.h +D src/tests/ptmalloc/thread-st.h +M src/tests/tcmalloc_unittest.cc +M src/windows/config.h +M src/windows/google/tcmalloc.h +M src/windows/port.cc + +commit 19dfa9e3733155e57406fbd082273eb53cb2750e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Sep 11 18:42:32 2009 +0000 + + Thu Sep 10 13:51:15 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.4 release + * Add debugallocation library, to catch memory leaks, + stomping, etc + * Add --raw mode to allow for delayed processing of pprof + files + * Use less memory when reading CPU profiles + * New environment variables to control kernel-allocs (sbrk, + memfs, etc) + * Add MarkThreadBusy(): performance improvement + * Remove static thread-cache-size code; all is dynamic now + * Add new HiddenPointer class to heap checker + * BUGFIX: pvalloc(0) allocates now (found by new debugalloc + library) + * BUGFIX: valloc test (not implementation) no longer + overruns memory + * BUGFIX: GetHeapProfile no longer deadlocks + * BUGFIX: Support unmapping memory regions before main + * BUGFIX: Fix some malloc-stats formatting + * BUGFIX: Don't crash as often when freeing libc-allocated + memory + * BUGFIX: Deal better with incorrect PPROF_PATH when + symbolizing + * BUGFIX: weaken new/delete/etc in addition to malloc/free/etc + * BUGFIX: Fix return value of GetAllocatedSize + * PORTING: Fix mmap-#define problem on some 64-bit systems + * PORTING: Call ranlib again (some OS X versions need it) + * PORTING: Fix a leak when building with LLVM + * PORTING: Remove some unneeded bash-ishs from testing scripts + * WINDOWS: Support library unloading as well as loading + * WINDOWS/BUGFIX: Set page to 'xrw' instead of 'rw' when + patching + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@76 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M aclocal.m4 +M configure +M configure.ac +M doc/tcmalloc.html +M google-perftools.sln +M m4/install_prefix.m4 +A m4/pc_from_ucontext.m4 +M packages/deb/changelog +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-x86.cc +M src/base/atomicops-internals-x86.h +M src/base/basictypes.h +M src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +M src/base/logging.h +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h +M src/base/simple_mutex.h +M src/base/spinlock.cc +M src/base/spinlock.h +A src/base/spinlock_linux-inl.h +A src/base/spinlock_posix-inl.h +A src/base/spinlock_win32-inl.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/vdso_support.cc +M src/base/vdso_support.h +M src/common.cc +M src/common.h +M src/config.h.in +A src/debugallocation.cc +M src/getpc.h +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/packed-cache-inl.h +M src/page_heap.h +M src/pagemap.h +M src/pprof +M src/profiler.cc +M src/sampler.h +M src/stacktrace_win32-inl.h +M src/stacktrace_with_context.cc +M src/stacktrace_x86-inl.h +A src/symbolize.cc +C060 src/stacktrace_with_context.cc src/symbolize.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +A src/tests/debugallocation_test.cc +A src/tests/debugallocation_test.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/heap-profiler_unittest.sh +M src/tests/low_level_alloc_unittest.cc +M src/tests/malloc_extension_test.cc +M src/tests/markidle_unittest.cc +M src/tests/memalign_unittest.cc +M src/tests/pagemap_unittest.cc +M src/tests/profile-handler_unittest.cc +M src/tests/profiledata_unittest.cc +M src/tests/profiler_unittest.cc +M src/tests/profiler_unittest.sh +M src/tests/sampler_test.cc +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/config.h +M src/windows/google/tcmalloc.h +M src/windows/patch_functions.cc +M src/windows/port.h +M src/windows/preamble_patcher.cc +M vsprojects/addr2line-pdb/addr2line-pdb.vcproj +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M vsprojects/frag_unittest/frag_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/malloc_extension_test/malloc_extension_test.vcproj +M vsprojects/markidle_unittest/markidle_unittest.vcproj +M vsprojects/nm-pdb/nm-pdb.vcproj +M vsprojects/packed-cache_test/packed-cache_test.vcproj +M vsprojects/pagemap_unittest/pagemap_unittest.vcproj +M vsprojects/realloc_unittest/realloc_unittest.vcproj +M vsprojects/sampler_test/sampler_test.vcproj +M vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj +M +vsprojects/tcmalloc_minimal_large/tcmalloc_minimal_large_unittest.vcproj +M +vsprojects/tcmalloc_minimal_unittest/tcmalloc_minimal_unittest.vcproj +M vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit 2197cc670204c583bba3903b765c77620f349609 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jun 10 02:04:26 2009 +0000 + + Tue Jun 9 18:19:06 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.3 release + * Provide our own name for memory functions: tc_malloc, etc + (csilvers) + * Weaken memory-alloc functions so user can override them + (csilvers) + * Remove meaningless delete(nothrow) and delete[](nothrow) + (csilvers) + * BUILD: replace clever libtcmalloc/profiler.a with a new .a + (csilvers) + * PORTING: improve windows port by using google spinlocks + (csilvers) + * PORTING: Fix RedHat 9 memory allocation in heapchecker + (csilvers) + * PORTING: Rename OS_WINDOWS macro to PLATFORM_WINDOWS + (mbelshe) + * PORTING/BUGFIX: Make sure we don't clobber GetLastError + (mbelshe) + * BUGFIX: get rid of useless data for callgrind (weidenrinde) + * BUGFIX: Modify windows patching to deadlock sometimes + (csilvers) + * BUGFIX: an improved fix for hook handling during fork + (csilvers) + * BUGFIX: revamp profiler_unittest.sh, which was very broken + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@74 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M README +M aclocal.m4 +M configure +M configure.ac +D m4/stl_hash.m4 +M packages/deb/changelog +M packages/rpm/rpm.spec +M src/base/atomicops-internals-linuxppc.h +M src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +M src/base/spinlock.cc +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/vdso_support.cc +M src/base/vdso_support.h +M src/config.h.in +A src/google/tcmalloc.h.in +M src/heap-checker.cc +M src/pprof +M src/profile-handler.cc +M src/tcmalloc.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/profiler_unittest.sh +M src/tests/stacktrace_unittest.cc +M src/windows/config.h +A src/windows/google/tcmalloc.h +M src/windows/override_functions.cc +M src/windows/patch_functions.cc + +commit 104bf697fbd2a0b90b5f01344ee01c8caa1745d0 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon May 18 22:50:20 2009 +0000 + + Use the google spinlock code instead of the built-in windows code. + The main benefit for perftools is that google spinlocks allow for + link-time (static) initialization, which we had to simulate before, + yielding bugs and worse performance. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@73 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops-internals-x86-msvc.h +M src/base/spinlock.cc +M src/base/spinlock.h +M src/windows/config.h +M src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/packed-cache_test/packed-cache_test.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit ad03b009ef2046cee9dc38afe022b487de37db5c +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Apr 25 01:01:23 2009 +0000 + + In the case of windows with HAS_EXCEPTIONS turned off we weren't able + to use the std::set_new_handler correctly. Rework the #ifdefs to + allow use of the std_new_handler, but ignore the exceptions. + + Patch submitted by mbelshe. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@72 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/tcmalloc.cc + +commit 1894763f57b00f72a720c4c126815c073d9ed0f3 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Apr 22 22:53:41 2009 +0000 + + Make sure we don't clobber GetLastError() (for windows). + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@71 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/windows/port.h + +commit a63235c4331445d718d151a2a659ec9687a0b8c8 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Apr 21 17:10:29 2009 +0000 + + 1) Change #include "config.h" to <config.h>. This is what automake + recommends, and makes it easier to override a config file. + + 2) Rename OS_WINDOWS in sysinfo.cc, to not conflict with a macro + defined in a windows SDK somewhere. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@70 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M src/base/atomicops.h +M src/base/basictypes.h +M src/base/commandlineflags.h +M src/base/elfcore.h +M src/base/logging.cc +M src/base/logging.h +M src/base/low_level_alloc.h +M src/base/simple_mutex.h +M src/base/spinlock.cc +M src/base/spinlock.h +M src/base/stl_allocator.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/vdso_support.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/page_heap.cc +M src/page_heap.h +M src/profiledata.cc +M src/profiledata.h +M src/raw_printer.cc +M src/raw_printer.h +M src/span.cc +M src/span.h +M src/stack_trace_table.cc +M src/stack_trace_table.h +M src/stacktrace.cc +M src/stacktrace_with_context.cc +M src/static_vars.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tcmalloc.h +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/override_functions.cc +M src/windows/patch_functions.cc +M src/windows/port.cc + +commit beb6a9a183c1ca25c99e4401b58266ce73b8c846 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Apr 18 00:02:25 2009 +0000 + + Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.2 release + * Allow large_alloc_threshold=0 to turn it off entirely + (csilvers) + * Die more helpfully when out of memory for internal data + (csilvers) + * Refactor profile-data gathering, add a new unittest (cgd, + nabeelmian) + * BUGFIX: fix rounding errors with static thread-size caches + (addi) + * BUGFIX: disable hooks better when forking in leak-checker + (csilvers) + * BUGFIX: fix realloc of crt pointers on windows (csilvers) + * BUGFIX: do a better job of finding binaries in .sh tests + (csilvers) + * WINDOWS: allow overriding malloc/etc instead of patching + (mbelshe) + * PORTING: fix compilation error in a ppc-specific file + (csilvers) + * PORTING: deal with quirks in cygwin's /proc/self/maps + (csilvers) + * PORTING: use 'A' version of functions for ascii input + (mbelshe) + * PORTING: generate .so's on cygwin and mingw (ajenjo) + * PORTING: disable profiler methods on cygwin (jperkins) + * Updated autoconf version to 2.61 and libtool version + to 1.5.26 + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@68 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README.windows +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +M ltmain.sh +M packages/deb/changelog +M packages/rpm.sh +M packages/rpm/rpm.spec +M src/base/atomicops-internals-linuxppc.h +M src/base/logging.cc +M src/base/logging.h +M src/base/simple_mutex.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/config.h.in +M src/google/heap-checker.h +M src/google/profiler.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/page_heap_allocator.h +M src/pprof +A src/profile-handler.cc +A src/profile-handler.h +M src/profiledata.cc +M src/profiledata.h +M src/profiler.cc +M src/sampler.cc +M src/stacktrace_win32-inl.h +M src/stacktrace_x86-inl.h +M src/tcmalloc.cc +M src/tests/frag_unittest.cc +A src/tests/profile-handler_unittest.cc +M src/tests/profiledata_unittest.cc +M src/tests/sampling_test.cc +M src/tests/sampling_test.sh +M src/tests/testutil.cc +M src/thread_cache.cc +M src/windows/addr2line-pdb.c +M src/windows/config.h +M src/windows/mingw.h +M src/windows/nm-pdb.c +A src/windows/override_functions.cc +M src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h + +commit edd03a831f350bc72d76d4fad2b390d43faccb79 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Mar 11 20:50:03 2009 +0000 + + Wed Mar 11 11:25:34 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.1 release + * Dynamically resize thread caches -- nice perf. improvement + (kash) + * Add VDSO support to give better stacktraces in linux + (ppluzhnikov) + * Improve heap-profiling sampling algorithm (ford) + * Rewrite leak-checking code: should be faster and more robust + (sanjay) + * Use ps2 instead of ps for dot: better page cropping for gv + (csilvers) + * Disable malloc-failure warning messages by default + (csilvers) + * Update config/Makefile to disable tests on a per-OS basis + (csilvers) + * PORTING: Get perftools compiling under MSVC 7.1 again + (csilvers) + * PORTING: Get perftools compiling under cygwin again + (csilvers) + * PORTING: automatically set library flags for solaris x86 + (csilvers) + * Add TCMALLOC_SKIP_SBRK to mirror TCMALLOC_SKIP_MMAP + (csilvers) + * Add --enable flags to allow selective building (csilvers) + * Put addr2line-pdb and nm-pdb in proper output directory + (csilvers) + * Remove deprecated DisableChecksIn (sanjay) + * DOCUMENTATION: Document most MallocExtension routines + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@66 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README +M README.windows +M aclocal.m4 +M configure +M configure.ac +M doc/pprof_remote_servers.html +M doc/tcmalloc.html +M google-perftools.sln +A m4/acx_nanosleep.m4 +M packages/deb.sh +M packages/deb/changelog +M src/base/basictypes.h +M src/base/cycleclock.h +M src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +M src/base/linux_syscall_support.h +M src/base/simple_mutex.h +M src/base/spinlock.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/thread_annotations.h +A src/base/vdso_support.cc +A src/base/vdso_support.h +M src/central_freelist.cc +M src/common.cc +M src/common.h +M src/config.h.in +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/google/malloc_extension_c.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/memory_region_map.cc +M src/page_heap.cc +M src/pprof +M src/profiler.cc +C061 src/static_vars.cc src/raw_printer.cc +A src/raw_printer.h +A src/sampler.cc +A src/sampler.h +M src/span.cc +A src/stack_trace_table.cc +C056 src/static_vars.cc src/stack_trace_table.h +M src/stacktrace.cc +C057 src/stacktrace.cc src/stacktrace_config.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_win32-inl.h +C059 src/static_vars.cc src/stacktrace_with_context.cc +M src/stacktrace_x86-inl.h +M src/stacktrace_x86_64-inl.h +M src/static_vars.cc +M src/static_vars.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.sh +C060 src/tests/sampling_test.cc src/tests/malloc_extension_test.cc +A src/tests/pagemap_unittest.cc +A src/tests/raw_printer_test.cc +A src/tests/realloc_unittest.cc +A src/tests/sampler_test.cc +M src/tests/sampling_test.cc +M src/tests/sampling_test.sh +A src/tests/stack_trace_table_test.cc +M src/tests/tcmalloc_unittest.cc +M src/thread_cache.cc +M src/thread_cache.h +M src/windows/addr2line-pdb.c +M src/windows/config.h +M src/windows/patch_functions.cc +M src/windows/port.h +M vsprojects/addr2line-pdb/addr2line-pdb.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +C061 vsprojects/nm-pdb/nm-pdb.vcproj +vsprojects/malloc_extension_test/malloc_extension_test.vcproj +M vsprojects/nm-pdb/nm-pdb.vcproj +C064 vsprojects/nm-pdb/nm-pdb.vcproj +vsprojects/pagemap_unittest/pagemap_unittest.vcproj +C065 vsprojects/nm-pdb/nm-pdb.vcproj +vsprojects/realloc_unittest/realloc_unittest.vcproj +C063 vsprojects/nm-pdb/nm-pdb.vcproj +vsprojects/sampler_test/sampler_test.vcproj +C061 vsprojects/nm-pdb/nm-pdb.vcproj +vsprojects/stack_trace_table_test/stack_trace_table_test.vcproj +M vsprojects/tmu-static/tmu-static.vcproj + +commit c75de4d1e91c339fb5142a8a21be8b3ba5224ef7 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Jan 6 19:41:15 2009 +0000 + + Tue Jan 6 13:58:56 2009 Google Inc. <opensource@google.com> + * google-perftools: version 1.0 release + * Exactly the same as 1.0rc2 + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@64 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M packages/deb/changelog +M src/pprof + +commit 49b02736109f9fe3383971e62bda2192ae86a36d +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Dec 15 01:20:31 2008 +0000 + + Sun Dec 14 17:10:35 2008 Google Inc. <opensource@google.com> + * google-perftools: version 1.0rc2 release + * Fix compile error on 64-bit systems (casting ptr to int) + (csilvers) + + Thu Dec 11 16:01:32 2008 Google Inc. <opensource@google.com> + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@62 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/heap-checker.cc + +commit 6fa2a2574ce1c15ac12293e24691d69a41972e54 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Dec 13 01:35:42 2008 +0000 + + Thu Dec 11 16:01:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0rc1 release + * Replace API for selectively disabling heap-checker in code + (sanjay) + * Add a pre-mmap hook (daven, adlr) + * Add MallocExtension interface to set memory-releasing rate + (fikes) + * Augment pprof to allow any string ending in /pprof/profile + (csilvers) + * PORTING: Rewrite -- and fix -- malloc patching for windows + (dvitek) + * PORTING: Add nm-pdb and addr2line-pdb for use by pprof + (dvitek) + * PORTING: Improve cygwin and mingw support (jperkins, + csilvers) + * PORTING: Fix pprof for mac os x, other pprof improvements + (csilvers) + * PORTING: Fix some PPC bugs in our locking code + (anton.blanchard) + * A new unittest, smapling_test, to verify tcmalloc-profiles + (csilvers) + * Turn off TLS for gcc < 4.1.2, due to a TLS + -fPIC bug + (csilvers) + * Prefer __builtin_frame_address to assembly for stacktraces + (nlewycky) + * Separate tcmalloc.cc out into multiple files -- + finally! (kash) + * Make our locking code work with -fPIC on 32-bit x86 (aruns) + * Fix an initialization-ordering bug for tcmalloc/profiling + (csilvers) + * Use "initial exec" model of TLS to speed up tcmalloc + (csilvers) + * Enforce 16-byte alignment for tcmalloc, for SSE (sanjay) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@60 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README.windows +M configure +M configure.ac +M doc/heap_checker.html +M doc/pprof_remote_servers.html +M google-perftools.sln +M packages/deb.sh +M packages/deb/changelog +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-x86.h +M src/base/basictypes.h +M src/base/linux_syscall_support.h +M src/base/logging.cc +M src/base/logging.h +M src/base/simple_mutex.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +A src/base/thread_annotations.h +M src/base/thread_lister.c +A src/central_freelist.cc +A src/central_freelist.h +A src/common.cc +A src/common.h +M src/getpc.h +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_hook.h +M src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +A src/linked_list.h +M src/malloc_extension.cc +M src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/memory_region_map.cc +M src/memory_region_map.h +A src/page_heap.cc +A src/page_heap.h +A src/page_heap_allocator.h +M src/pprof +M src/profiler.cc +A src/span.cc +A src/span.h +M src/stacktrace.cc +M src/stacktrace_libunwind-inl.h +A src/stacktrace_win32-inl.h +M src/stacktrace_x86-inl.h +C059 src/base/logging.cc src/static_vars.cc +A src/static_vars.h +M src/tcmalloc.cc +C071 src/base/logging.cc src/tcmalloc_guard.h +M src/tests/atomicops_unittest.cc +M src/tests/frag_unittest.cc +M src/tests/heap-checker_unittest.cc +M src/tests/low_level_alloc_unittest.cc +C052 src/base/logging.cc src/tests/sampling_test.cc +A src/tests/sampling_test.sh +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/testutil.cc +A src/thread_cache.cc +A src/thread_cache.h +A src/windows/addr2line-pdb.c +A src/windows/get_mangled_names.cc +M src/windows/mingw.h +M src/windows/mini_disassembler.cc +M src/windows/mini_disassembler.h +M src/windows/mini_disassembler_types.h +A src/windows/nm-pdb.c +M src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h +M src/windows/preamble_patcher.cc +M src/windows/preamble_patcher.h +M src/windows/preamble_patcher_with_stub.cc +D src/windows/vc7and8.def +R059 vsprojects/memalign_unittest/memalign_unittest.vcproj +vsprojects/addr2line-pdb/addr2line-pdb.vcproj +M vsprojects/frag_unittest/frag_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/markidle_unittest/markidle_unittest.vcproj +C068 vsprojects/frag_unittest/frag_unittest.vcproj +vsprojects/nm-pdb/nm-pdb.vcproj +M +vsprojects/tcmalloc_minimal_large/tcmalloc_minimal_large_unittest.vcproj +M +vsprojects/tcmalloc_minimal_unittest/tcmalloc_minimal_unittest.vcproj +M vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj +R074 +vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj +vsprojects/tmu-static/tmu-static.vcproj + +commit 16191f87ff8dc78295c0f617060460664fc444bd +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Sep 23 17:51:05 2008 +0000 + + Tue Sep 23 08:56:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.2 release + * COMPILE FIX: add #include needed for FreeBSD and OS X + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@58 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/tests/heap-profiler_unittest.cc + +commit 8b2dd25dc9d1523ab9a86bd39c4fb798c89db899 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Sep 20 17:15:23 2008 +0000 + + Sat Sep 20 09:37:18 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.1 release + * BUG FIX: look for nm, etc in /usr/bin, not /usr/crosstool + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@56 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M packages/deb/changelog +M src/pprof + +commit 106aef86ce5697cf44bdbec90ab6833b9254d273 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Sep 19 20:06:40 2008 +0000 + + Thu Sep 18 16:00:27 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99 release + * Add IsHeapProfileRunning (csilvers) + * Add C shims for some of the C++ header files (csilvers) + * Fix heap profile file clean-up logic (maxim) + * Rename linuxthreads.c to .cc for better compiler support + (csilvers) + * Add source info to disassembly in pprof (sanjay) + * Use open instead of fopen to avoid memory alloc (csilvers) + * Disable malloc extensions when running under valgrind (kcc) + * BUG FIX: Fix out-of-bound error by reordering a check + (larryz) + * Add Options struct to ProfileData (cgd) + * Correct PC-handling of --base in pprof (csilvers) + * Handle 1 function occurring twice in an image (sanjay) + * Improve stack-data cleaning (maxim) + * Use 'struct Foo' to make header C compatible (csilvers) + * Add 'total' line to pprof --text (csilvers) + * Pre-allocate buffer for heap-profiler to avoid OOM errors + (csilvers) + * Allow a few more env-settings to control tcmalloc (csilvers) + * Document some of the issues involving thread-local storage + (csilvers) + * BUG FIX: Define strtoll and friends for windows (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@54 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README +M README.windows +M configure +M configure.ac +M doc/heapprofile.html +M doc/tcmalloc.html +M packages/deb/changelog +M src/addressmap-inl.h +M src/base/basictypes.h +M src/base/commandlineflags.h +M src/base/cycleclock.h +M src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +R099 src/base/linuxthreads.c src/base/linuxthreads.cc +M src/base/spinlock.h +M src/base/stl_allocator.h +M src/base/sysinfo.cc +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +A src/google/malloc_extension_c.h +M src/google/malloc_hook.h +A src/google/malloc_hook_c.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/memory_region_map.cc +M src/pprof +M src/profiledata.cc +M src/profiledata.h +M src/profiler.cc +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/markidle_unittest.cc +M src/tests/profiledata_unittest.cc +M src/windows/port.h + +commit 100e657c5092bc274424286a728db5116a4bbc54 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Sat Jun 14 02:30:53 2008 +0000 + + Mon Jun 9 16:47:03 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.98 release + * Add ProfilerStartWithOptions() (cgd) + * Change tcmalloc_minimal to not do any stack-tracing at all + (csilvers) + * Prefer mmap to sbrk for 64-buit debug mode (sanjay) + * Fix accounting for some tcmalloc stats (sanjay) + * Use setrlimit() to keep unittests from killing the machine + (odo) + * Fix a bug when sbrk-ing near address 4G (csilvers) + * Make MallocHook thread-safe (jyasskin) + * Fix windows build for MemoryBarrier (jyasskin) + * Fix CPU-profiler docs to mention correct libs (csilvers) + * Fix for GetHeapProfile() when heap-profiling is off (maxim) + * Avoid realloc resizing ping-pongs using hysteresis + (csilvers) + * Add --callgrind output support to pprof (klimek) + * Fix profiler.h and heap-profiler.h to be C-compatible + (csilvers) + * Break malloc_hook.h into two parts to reduce dependencies + (csilvers) + * Better handle systems that don't implement mmap (csilvers) + * PORTING: disable system_alloc_unittest for msvc (csilvers) + * PORTING: Makefile tweaks to build better on cygwin + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@52 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M Makefile.in +M README.windows +M configure +M configure.ac +M doc/cpuprofile.html +M google-perftools.sln +M m4/acx_pthread.m4 +M packages/deb/changelog +M src/addressmap-inl.h +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-x86-msvc.h +M src/base/atomicops-internals-x86.cc +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/base/commandlineflags.h +M src/base/cycleclock.h +M src/base/dynamic_annotations.cc +M src/base/dynamic_annotations.h +M src/base/linux_syscall_support.h +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h +M src/base/simple_mutex.h +M src/base/spinlock.h +M src/base/stl_allocator.h +M src/base/sysinfo.h +M src/config.h.in +M src/getpc.h +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_hook.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.h +A src/malloc_hook-inl.h +M src/malloc_hook.cc +M src/maybe_threads.cc +M src/maybe_threads.h +M src/memory_region_map.cc +M src/memory_region_map.h +M src/packed-cache-inl.h +M src/pagemap.h +M src/pprof +M src/profiledata.h +M src/profiler.cc +M src/stacktrace_generic-inl.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/maybe_threads_unittest.sh +M src/tests/memalign_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/testutil.cc +M src/tests/testutil.h +M src/windows/config.h +M src/windows/mingw.h +M src/windows/mini_disassembler.h +M src/windows/mini_disassembler_types.h +M src/windows/patch_functions.cc +M src/windows/port.h +M src/windows/preamble_patcher.h +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M vsprojects/memalign_unittest/memalign_unittest.vcproj +D vsprojects/system_alloc_unittest/system_alloc_unittest.vcproj +M +vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj + +commit 7ec719093b1c9fda979ba0d07eed288e2a7c3c9b +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Apr 22 01:47:16 2008 +0000 + + Mon Apr 21 15:20:52 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.97 release + * Refactor GetHeapProfile to avoid using malloc (maxim) + * Fix heap-checker and heap-profiler hook interactions (maxim) + * Fix a data race in MemoryRegionMap::Lock (jyasskin) + * Improve thread-safety of leak checker (maxim) + * Fix mmap profile to no longer deadlock (maxim) + * Fix rpm to have devel package depend on non-devel (csilvers) + * PORTING: Fix clock-speed detection for Mac OS X (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@50 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M doc/heapprofile.html +M packages/deb/changelog +M packages/rpm/rpm.spec +M src/addressmap-inl.h +M src/base/cycleclock.h +M src/base/elfcore.h +M src/base/low_level_alloc.cc +M src/base/spinlock.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/profiler.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/memory_region_map.cc +M src/memory_region_map.h +M src/pprof +M src/stacktrace_libunwind-inl.h +M src/stacktrace_x86-inl.h +M src/tcmalloc.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/low_level_alloc_unittest.cc +M src/tests/maybe_threads_unittest.sh +M src/windows/port.cc + +commit 97fdd4a4f97dd15e8803ed51ac153903c2cdffc2 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Mar 19 23:35:27 2008 +0000 + + Tue Mar 18 14:30:44 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.96 release + * major atomicops rewrite; fixed atomic ops code for linux/ppc + (vchen) + * nix the stacktrace library; now build structure is simpler + (csilvers) + * Speed up heap-checker, and reduce extraneous logging (maxim) + * Improve itimer code for NPTL case (cgd) + * Add source code annotations for use by valgrind, etc (kcc) + * PORTING: Fix high resolution timers for Mac OS X (adlr) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@48 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M TODO +M configure +M configure.ac +M doc/cpuprofile.html +M doc/heapprofile.html +M packages/deb/changelog +M packages/rpm/rpm.spec +M src/addressmap-inl.h +M src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-macosx.h +M src/base/atomicops-internals-x86-msvc.h +M src/base/atomicops-internals-x86.cc +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/base/basictypes.h +M src/base/cycleclock.h +A src/base/dynamic_annotations.cc +A src/base/dynamic_annotations.h +M src/base/linux_syscall_support.h +M src/base/low_level_alloc.cc +M src/base/spinlock.cc +M src/base/spinlock.h +M src/base/sysinfo.cc +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/memfs_malloc.cc +M src/pprof +M src/profiler.cc +M src/tests/atomicops_unittest.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/maybe_threads_unittest.sh +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +M +vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj + +commit a644b4f2d61b9610ec6eeb1f09ebce7054aa0762 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Tue Feb 19 22:19:22 2008 +0000 + + Tue Feb 19 12:01:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95.1 release (bugfix release) + * x86_64 compile-fix: nix pread64 and pwrite64 (csilvers) + * more heap-checker debug logging (maxim) + * minor improvement to x86_64 CycleClock (gpike) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@46 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/base/cycleclock.h +M src/base/linux_syscall_support.h +M src/heap-checker.cc + +commit 8a0a3101bc6a7d56ac04b278f28bdf3f95b00a3c +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Feb 13 00:55:09 2008 +0000 + + Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95 release + * Better -- not perfect -- support for linux-ppc (csilvers) + * Fix race condition in libunwind stacktrace (aruns) + * Speed up x86 spinlock locking (m3b) + * Improve heap-checker performance (maxim) + * Heap checker traverses more ptrs inside heap-alloced objects + (maxim) + * Remove deprecated ProfilerThreadState function (cgd) + * Update libunwind documentation for statically linked + binaries (aruns) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@44 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M configure +M configure.ac +M doc/heap_checker.html +M doc/tcmalloc.html +M packages/deb/changelog +M packages/deb/docs +M packages/rpm/rpm.spec +M src/addressmap-inl.h +A src/base/atomicops-internals-linuxppc.h +M src/base/atomicops-internals-x86-msvc.h +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/base/commandlineflags.h +A src/base/cycleclock.h +M src/base/elfcore.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.c +M src/base/linuxthreads.h +M src/base/logging.h +R090 src/base/mutex.h src/base/simple_mutex.h +M src/base/spinlock.cc +M src/base/spinlock.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/google/heap-checker.h +M src/google/profiler.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/maybe_threads.cc +M src/packed-cache-inl.h +M src/profiledata.h +M src/profiler.cc +M src/stacktrace.cc +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +M src/tests/heap-checker_unittest.cc +M src/tests/profiler_unittest.cc +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/preamble_patcher.h + +commit b43ba444fcd74fa7c3260f6b2494dcbaa3fdb296 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Dec 5 00:08:28 2007 +0000 + + Mon Dec 3 23:51:54 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94.1 release (bugfix release) + * Fix missing #includes for x86_64 compile using libunwind + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@42 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/stacktrace_libunwind-inl.h + +commit 11b02f7aebd05cf39f6f93bdd48786909f99f34e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Nov 29 23:39:24 2007 +0000 + + Thu Nov 29 07:59:43 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94 release + * PORTING: MinGW/Msys support -- runs same code as MSVC does + (csilvers) + * PORTING: Add NumCPUs support for Mac OS X (csilvers) + * Work around a sscanf bug in glibc(?) (waldemar) + * Fix Windows MSVC bug triggered by thread deletion (csilvers) + * Fix bug that triggers in MSVC /O2: missing volatile (gpike) + * March-of-time support: quiet warnings/errors for gcc 4.2, + OS X 10.5 + * Modify pprof so it works without nm: useful for windows + (csilvers) + * pprof: Support filtering for CPU profiles (cgd) + * Bugfix: have realloc report to hooks in all situations + (maxim) + * Speed improvement: replace slow memcpy with std::copy + (soren) + * Speed: better iterator efficiency in RecordRegionRemoval + (soren) + * Speed: minor speed improvements via better bitfield + alignment (gpike) + * Documentation: add documentation of binary profile output + (cgd) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@40 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M configure +M configure.ac +A doc/cpuprofile-fileformat.html +M doc/cpuprofile.html +M packages/deb/changelog +M src/base/linux_syscall_support.h +M src/base/spinlock.cc +M src/base/sysinfo.cc +M src/config.h.in +M src/getpc.h +M src/google/heap-checker.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/malloc_hook.cc +M src/memfs_malloc.cc +M src/memory_region_map.cc +M src/packed-cache-inl.h +M src/pprof +A src/profiledata.cc +A src/profiledata.h +M src/profiler.cc +M src/stacktrace_generic-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_powerpc-inl.h +M src/stacktrace_x86-inl.h +M src/stacktrace_x86_64-inl.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +M src/tests/heap-checker_unittest.cc +M src/tests/low_level_alloc_unittest.cc +M src/tests/maybe_threads_unittest.sh +A src/tests/profiledata_unittest.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/testutil.cc +M src/windows/config.h +A src/windows/mingw.h +M src/windows/mini_disassembler.h +M src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h +M src/windows/preamble_patcher.h +M src/windows/preamble_patcher_with_stub.cc +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj + +commit 49b74b9508797f8aafe6b86e62e7efc4ec200e48 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Fri Aug 17 20:56:15 2007 +0000 + + * google-perftools: version 0.93 release + * PORTING: everything compiles on Solaris, OS X, FreeBSD + (see INSTALL) + * PORTING: cpu-profiler works on most platforms (much + better GetPC()) + * PORTING: heap-profiler works on most platforms + * PORTING: improved windows support, including release builds + * No longer build or run ptmalloc tests by default + * Add support for using memfs filesystem to allocate memory + in linux + * WINDOWS: give debug library and release library different + names + + Tue Jul 17 22:26:27 2007 Google Inc. <opensource@google.com> + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@38 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README +M README.windows +M autogen.sh +M configure +M configure.ac +M google-perftools.sln +M m4/program_invocation_name.m4 +M packages/deb/changelog +M src/addressmap-inl.h +M src/base/basictypes.h +M src/base/logging.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/base/thread_lister.c +M src/config.h.in +M src/config_for_unittests.h +A src/getpc.h +M src/google/heap-checker.h +M src/google/malloc_hook.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profile-table.h +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/malloc_hook.cc +A src/memfs_malloc.cc +M src/memory_region_map.cc +M src/memory_region_map.h +M src/pprof +M src/profiler.cc +M src/stacktrace_powerpc-inl.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +A src/tests/getpc_test.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.sh +M src/tests/low_level_alloc_unittest.cc +M src/tests/maybe_threads_unittest.sh +M src/tests/memalign_unittest.cc +M src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/windows/TODO +M src/windows/config.h +A src/windows/ia32_modrm_map.cc +A src/windows/ia32_opcode_map.cc +A src/windows/mini_disassembler.cc +A src/windows/mini_disassembler.h +A src/windows/mini_disassembler_types.h +A src/windows/patch_functions.cc +M src/windows/port.cc +M src/windows/port.h +A src/windows/preamble_patcher.cc +A src/windows/preamble_patcher.h +A src/windows/preamble_patcher_with_stub.cc +M src/windows/vc7and8.def +M vsprojects/addressmap_unittest/addressmap_unittest.vcproj +M vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +M +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +C054 vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj + +commit c437e1fcdd1e6ff3f032928d460cbfc115e2324f +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Wed Jul 18 18:30:50 2007 +0000 + + Tue Jul 17 22:26:27 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.92 release + * PERFORMANCE: use a packed cache to speed up tcmalloc + * PORTING: preliminary windows support! (see README.windows) + * PORTING: better support for solaris, OS X, FreeBSD + (see INSTALL) + * Envvar support for running the heap-checker under gdb + * Add weak declarations to maybe_threads to fix no-pthreads + compile bugs + * Some 64bit fixes, especially with pprof + * Better heap-checker support for some low-level allocations + * Fix bug where heap-profiles would sometimes get truncated + * New documentation about how to handle common heap leak + situations + * Use computed includes for hash_map/set: easier config + * Added all used .m4 templates to the distribution + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@36 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README +A README.windows +M aclocal.m4 +A autogen.sh +M compile +M config.guess +M config.sub +M configure +M configure.ac +M depcomp +M doc/cpuprofile.html +M doc/heap_checker.html +M doc/heapprofile.html +A google-perftools.sln +M install-sh +A m4/ac_have_attribute.m4 +A m4/acx_pthread.m4 +A m4/compiler_characteristics.m4 +A m4/install_prefix.m4 +A m4/namespaces.m4 +A m4/program_invocation_name.m4 +A m4/stl_hash.m4 +A m4/stl_namespace.m4 +M missing +M mkinstalldirs +M packages/deb/changelog +M packages/rpm/rpm.spec +M src/base/atomicops-internals-x86-msvc.h +M src/base/atomicops-internals-x86.h +M src/base/atomicops.h +M src/base/basictypes.h +M src/base/commandlineflags.h +M src/base/logging.h +M src/base/low_level_alloc.cc +M src/base/low_level_alloc.h +D src/base/mutex.cc +M src/base/mutex.h +M src/base/spinlock.cc +M src/base/spinlock.h +M src/base/stl_allocator.h +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/config.h.in +C068 src/tests/testutil.h src/config_for_unittests.h +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_hook.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker.cc +M src/heap-profile-table.cc +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/maybe_threads.cc +M src/maybe_threads.h +M src/memory_region_map.cc +M src/memory_region_map.h +A src/packed-cache-inl.h +M src/pprof +M src/profiler.cc +M src/solaris/libstdc++.la +M src/stacktrace.cc +A src/stacktrace_powerpc-inl.h +M src/stacktrace_x86-inl.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +C053 src/tests/testutil.cc src/tcmalloc.h +M src/tests/frag_unittest.cc +M src/tests/heap-checker_unittest.cc +M src/tests/heap-profiler_unittest.cc +M src/tests/heap-profiler_unittest.sh +M src/tests/low_level_alloc_unittest.cc +M src/tests/markidle_unittest.cc +A src/tests/maybe_threads_unittest.sh +M src/tests/memalign_unittest.cc +C065 src/tests/testutil.h src/tests/packed-cache_test.cc +M src/tests/profiler_unittest.cc +M src/tests/stacktrace_unittest.cc +C051 src/tests/thread_dealloc_unittest.cc +src/tests/system-alloc_unittest.cc +M src/tests/tcmalloc_unittest.cc +M src/tests/testutil.cc +M src/tests/testutil.h +M src/tests/thread_dealloc_unittest.cc +A src/windows/TODO +C060 src/config.h.in src/windows/config.h +A src/windows/port.cc +A src/windows/port.h +A src/windows/vc7and8.def +A vsprojects/addressmap_unittest/addressmap_unittest.vcproj +A vsprojects/frag_unittest/frag_unittest.vcproj +A vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +A +vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +A vsprojects/markidle_unittest/markidle_unittest.vcproj +A vsprojects/memalign_unittest/memalign_unittest.vcproj +A vsprojects/packed-cache_test/packed-cache_test.vcproj +A vsprojects/system_alloc_unittest/system_alloc_unittest.vcproj +A +vsprojects/tcmalloc_minimal_large/tcmalloc_minimal_large_unittest.vcproj +A +vsprojects/tcmalloc_minimal_unittest/tcmalloc_minimal_unittest.vcproj +A vsprojects/thread_dealloc_unittest/thread_dealloc_unittest.vcproj + +commit 6878379d5bab87c787cdd3487b5620a9c8adf376 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Apr 19 00:53:22 2007 +0000 + + Wed Apr 18 16:43:55 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.91 release + * Brown-paper-bag bugfix: compilation error on some x86-64 + machines + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@30 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M README +M config.guess +M config.sub +M configure +M configure.ac +M src/base/sysinfo.cc +M src/base/sysinfo.h +M src/stacktrace_libunwind-inl.h +M src/tests/ptmalloc/malloc-machine.h + +commit 74ad5d57ec08abace386befc6c3c695d85f44d1a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Mon Apr 16 20:49:32 2007 +0000 + + Fri Apr 13 14:50:51 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.90 release + * (As the version-number jump hints, this is a major new + release: + almost every piece of functionality was rewritten. + I can't do + justice to all the changes, but will concentrate on + highlights.) + *** USER-VISIBLE CHANGES: + * Ability to "release" unused memory added to tcmalloc + * Exposed more tweaking knobs via environment variables + (see docs) + * pprof tries harder to map addresses to functions + * tcmalloc_minimal compiles and runs on FreeBSD 6.0 and + Solaris 10 + *** INTERNAL CHANGES: + * Much better 64-bit support + * Better multiple-processor support (e.g. multicore contention + tweaks) + * Support for recent kernel ABI changes (e.g. new arg + to mremap) + * Addition of spinlocks to tcmalloc to reduce contention cost + * Speed up tcmalloc by using __thread on systems that + support TLS + * Total redesign of heap-checker to improve liveness checking + * More portable stack-frame analysis -- no more hard-coded + constants! + * Disentangled heap-profiler code and heap-checker code + * Several new unittests to test, e.g., thread-contention costs + * Lots of small (but important!) bug fixes: e.g., fixing + GetPC on amd64 + *** KNOWN PROBLEMS: + * CPU-profiling may crash on x86_64 (64-bit) systems. + See the README + * Profiling/heap-checking may deadlock on x86_64 systems. + See README + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@28 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M INSTALL +M Makefile.am +M Makefile.in +M README +M TODO +M aclocal.m4 +M config.guess +M config.sub +M configure +M configure.ac +R050 doc/cpu_profiler.html doc/cpuprofile.html +A doc/designstyle.css +M doc/heap_checker.html +D doc/heap_profiler.html +A doc/heapprofile.html +M doc/index.html +M doc/tcmalloc.html +M ltmain.sh +M packages/deb/changelog +A packages/deb/docs +M packages/rpm/rpm.spec +M src/addressmap-inl.h +A src/base/atomicops-internals-macosx.h +A src/base/atomicops-internals-x86-msvc.h +A src/base/atomicops-internals-x86.cc +A src/base/atomicops-internals-x86.h +A src/base/atomicops.h +M src/base/basictypes.h +M src/base/commandlineflags.h +M src/base/elfcore.h +M src/base/googleinit.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.c +M src/base/linuxthreads.h +C074 src/base/googleinit.h src/base/logging.cc +M src/base/logging.h +A src/base/low_level_alloc.cc +A src/base/low_level_alloc.h +A src/base/mutex.cc +A src/base/mutex.h +A src/base/spinlock.cc +A src/base/spinlock.h +A src/base/stl_allocator.h +A src/base/sysinfo.cc +A src/base/sysinfo.h +M src/base/thread_lister.c +M src/base/thread_lister.h +M src/config.h.in +M src/google/heap-checker.h +M src/google/heap-profiler.h +M src/google/malloc_extension.h +M src/google/malloc_hook.h +M src/google/profiler.h +M src/google/stacktrace.h +M src/heap-checker-bcad.cc +M src/heap-checker.cc +A src/heap-profile-table.cc +A src/heap-profile-table.h +D src/heap-profiler-inl.h +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_logging.h +D src/internal_spinlock.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/maybe_threads.cc +A src/memory_region_map.cc +A src/memory_region_map.h +M src/pprof +M src/profiler.cc +A src/solaris/libstdc++.la +M src/stacktrace.cc +M src/stacktrace_generic-inl.h +M src/stacktrace_libunwind-inl.h +M src/stacktrace_x86-inl.h +M src/stacktrace_x86_64-inl.h +M src/system-alloc.cc +M src/system-alloc.h +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +A src/tests/atomicops_unittest.cc +A src/tests/frag_unittest.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/heap-checker_unittest.sh +M src/tests/heap-profiler_unittest.cc +M src/tests/heap-profiler_unittest.sh +A src/tests/low_level_alloc_unittest.cc +A src/tests/markidle_unittest.cc +A src/tests/memalign_unittest.cc +M src/tests/profiler_unittest.sh +M src/tests/ptmalloc/malloc-machine.h +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_large_unittest.cc +M src/tests/tcmalloc_unittest.cc +C055 src/stacktrace.cc src/tests/testutil.cc +C074 src/base/googleinit.h src/tests/testutil.h +C055 src/stacktrace.cc src/tests/thread_dealloc_unittest.cc + +commit ddbf2f027fb4ca8781fd50820ceb870570f414bc +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 05:03:28 2007 +0000 + + Now that we've uploaded the full source, including the doc/ directory, + we can get rid of docs/ + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@27 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +D docs/html/cpu_profiler.html +D docs/html/heap_checker.html +D docs/html/heap_profiler.html +D docs/html/tcmalloc.html +D docs/images/heap-example1.png +D docs/images/overview.gif +D docs/images/pageheap.gif +D docs/images/pprof-test.gif +D docs/images/pprof-vsnprintf.gif +D docs/images/spanmap.gif +D docs/images/tcmalloc-opspercpusec.png +D docs/images/tcmalloc-opspercpusec_002.png +D docs/images/tcmalloc-opspercpusec_003.png +D docs/images/tcmalloc-opspercpusec_004.png +D docs/images/tcmalloc-opspercpusec_005.png +D docs/images/tcmalloc-opspercpusec_006.png +D docs/images/tcmalloc-opspercpusec_007.png +D docs/images/tcmalloc-opspercpusec_008.png +D docs/images/tcmalloc-opspercpusec_009.png +D docs/images/tcmalloc-opspersec.png +D docs/images/tcmalloc-opspersec_002.png +D docs/images/tcmalloc-opspersec_003.png +D docs/images/tcmalloc-opspersec_004.png +D docs/images/tcmalloc-opspersec_005.png +D docs/images/tcmalloc-opspersec_006.png +D docs/images/tcmalloc-opspersec_007.png +D docs/images/tcmalloc-opspersec_008.png +D docs/images/tcmalloc-opspersec_009.png +D docs/images/threadheap.gif + +commit 7ede7d6a9fe772b0bfa05acb2a2a6867405b474a +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 05:01:39 2007 +0000 + + set mime-type for png and gifs + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@26 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit 8e188310f7d8732d81b7b04f193f89964b7af6c5 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:55:49 2007 +0000 + + Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.8 release + * Experimental support for remote profiling added to pprof + (many) + * Fixed race condition in ProfileData::FlushTable (etune) + * Better support for weird /proc maps (maxim, mec) + * Fix heap-checker interaction with gdb (markus) + * Better 64-bit support in pprof (aruns) + * Reduce scavenging cost in tcmalloc by capping NumMoveSize + (sanjay) + * Cast syscall(SYS_mmap); works on more 64-bit systems now + (menage) + * Document the text output of pprof! (csilvers) + * Better compiler support for no-THREADS and for old compilers + (csilvers) + * Make libunwind the default stack unwinder for x86-64 (aruns) + * Somehow the COPYING file got erased. Regenerate it + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@23 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M COPYING +M ChangeLog +M Makefile.am +M aclocal.m4 +M configure +M configure.ac +M doc/cpu_profiler.html +A doc/pprof_remote_servers.html +M src/base/linux_syscall_support.h +M src/base/linuxthreads.c +M src/base/thread_lister.c +M src/google/heap-checker.h +M src/heap-checker.cc +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/pprof +M src/profiler.cc +M src/stacktrace.cc +M src/stacktrace_libunwind-inl.h +M src/tcmalloc.cc +M src/tests/heap-checker_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit c3b96b3ac552160abde541bba8ac7b4f8338efa0 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:48:00 2007 +0000 + + Thu Apr 13 20:59:09 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.7 release + * Major rewrite of thread introspection for new kernels + (markus) + * Major rewrite of heap-checker to use new thread tools + (maxim) + * Add proper support for following data in thread registers + (maxim) + * Syscall support for older kernels, including _syscall6 + (markus) + * Support PIC mode (markus, mbland, iant) + * Better support for running in non-threaded contexts + (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@21 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/base/elfcore.h +M src/base/linux_syscall_support.h +M src/base/linuxthreads.c +M src/base/thread_lister.h +M src/google/heap-checker.h +M src/heap-checker.cc +M src/heap-profiler.cc +M src/malloc_hook.cc +M src/pprof +M src/profiler.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit 60a3a2ce77ed2713b2eedd20952d9cfc56ff7ccf +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:46:29 2007 +0000 + + Fri Jan 27 14:04:27 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.6 release + * More sophisticated stacktrace usage, possibly using + libunwind (aruns) + * Update pprof to handle 64-bit profiles (dehnert) + * Fix GetStackTrace to correctly return top stackframe + (sanjay) + * Add ANSI compliance for new and new[], including new_handler + (jkearney) + * More accuracy by reading ELF files directly rather than + objdump (mec) + * Add readline support for pprof (addi) + * Add #includes for PPC (csilvers) + * New PC-detection routine for ibook powerpc (asbestoshead) + * Vastly improved tcmalloc unittest (csilvers) + * Move documentation from /usr/doc to /usr/share/doc + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@19 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M configure +M configure.ac +M packages/rpm/rpm.spec +M src/base/thread_lister.c +M src/config.h.in +M src/heap-checker.cc +M src/heap-profiler.cc +M src/malloc_extension.cc +M src/pprof +M src/profiler.cc +M src/stacktrace.cc +C065 src/tests/tcmalloc_unittest.cc src/stacktrace_generic-inl.h +C064 src/tests/tcmalloc_unittest.cc src/stacktrace_libunwind-inl.h +C068 src/stacktrace.cc src/stacktrace_x86-inl.h +A src/stacktrace_x86_64-inl.h +M src/tcmalloc.cc +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit 298274f8d4f474d2b16a35c8babc58817088c59e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:45:23 2007 +0000 + + Mon Nov 14 17:28:59 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.5 release + * Add va_start/va_end calls around vsnprintf() (csilvers) + * Write our own __syscall_return(), since it's not defined + consistently on all 64-bit linux distros (markus) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@17 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/base/linux_syscall_support.h +M src/base/logging.h +M src/config.h.in + +commit ee5805f1296f8546c16f90d5427efa347a5f7338 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:44:18 2007 +0000 + + Wed Oct 26 15:19:16 2005 Google Inc. <opensource@google.com> + + * Decrease fragmentation in tcmalloc (lefevere) + * Support for ARM in some of the thread-specific code (markus) + * Turn off heap-checker for statically-linked binaries, which + cause error leak reports now (etune) + * Many pprof improvements, including a command-line interface + (jeff) + * CPU profiling now automatically affects all threads in + linux 2.6. + (Kernel bugs break CPU profiling and threads in linux 2.4 + a bit.) + ProfilerEnable() and ProfilerDisable() are deprecated. + (sanjay) + * tcmalloc now correctly intercepts memalign (m3b, maxim) + * Syntax fix: added missing va_end()s. Helps non-gcc + compiling (etune) + * Fixed a few coredumper bugs: race condition after + PTRACE_DETACH, + ignore non-aligned stackframe pointers (markus, menage) + * 64-bit cleanup, especially for spinlock code (etune) + and mmap (sanjay) + * Better support for finding threads in linux (markus) + * tcmalloc now tracks those stack traces that allocate memory + (sanjay) + * Work around a weird setspecific problem (sanjay) + * Fix tcmalloc overflow problems when an alloc is close to + 2G/4G (sanjay) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@15 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M aclocal.m4 +M configure +M configure.ac +M doc/cpu_profiler.html +D src/base/elfcore.c +M src/base/elfcore.h +A src/base/linux_syscall_support.h +M src/base/linuxthreads.c +M src/base/linuxthreads.h +M src/base/thread_lister.c +M src/base/thread_lister.h +M src/google/heap-checker.h +M src/google/malloc_extension.h +M src/google/profiler.h +M src/heap-checker.cc +M src/heap-profiler.cc +M src/internal_logging.cc +M src/internal_spinlock.h +M src/malloc_extension.cc +M src/malloc_hook.cc +M src/pagemap.h +M src/pprof +M src/profiler.cc +M src/stacktrace.cc +M src/system-alloc.cc +M src/tcmalloc.cc +A src/tests/tcmalloc_large_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit bc455d7b63949fab94ed9518d277866e95f08768 +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 04:42:30 2007 +0000 + + Fri Jun 24 18:02:26 2005 Google Inc. <opensource@google.com> + + * Add missing errno include for one of the unittests + (csilvers) + * Reduce tcmalloc startup memory from 5M to 256K (sanjay) + * Add support for mallopt() and mallinfo (sanjay) + * Improve stacktrace's performance on some 64-bit systems + (etune) + * Improve the stacktrace unittest (etune) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@13 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M configure +M configure.ac +M src/pagemap.h +M src/stacktrace.cc +M src/tcmalloc.cc +M src/tests/heap-checker_unittest.cc +M src/tests/stacktrace_unittest.cc + +commit 91fad389784766782263133c5510976a8f76d89e +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 03:28:56 2007 +0000 + + Tue May 31 08:14:38 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.2 release + * Use mmap2() instead of mmap(), to map more memory (menage) + * Do correct pthread-local checking in heap-checker! (maxim) + * Avoid overflow on 64-bit machines in pprof (sanjay) + * Add a few more GetPC() functions, including for AMD + (csilvers) + * Better method for overriding pthread functions (menage) + * (Hacky) fix to avoid overwriting profile files after fork() + (csilvers) + * Crashing bugfix involving dumping heaps on small-stack + threads (tudor) + * Allow library versions with letters at the end (csilvers) + * Config fixes for systems that don't define PATH_MAX + (csilvers) + * Confix fixes so we no longer need config.h after install + (csilvers) + * Fix to pprof to correctly read very big cpu profiles + (csilvers) + * Fix to pprof to deal with new commandline flags in + modern gv's + * Better error reporting when we can't access /proc/maps + (etune) + * Get rid of the libc-preallocate code (which could crash + on some + systems); no longer needed with local-threads fix (csilvers) + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@11 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +M ChangeLog +M Makefile.am +M README +M TODO +M configure +M configure.ac +M doc/cpu_profiler.html +A doc/heap_checker.html +M doc/heap_profiler.html +A doc/index.html +A doc/t-test1.times.txt +A doc/tcmalloc-opspercpusec.vs.threads.1024.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.128.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.131072.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.16384.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.2048.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.256.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.32768.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.4096.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.512.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.64.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.65536.bytes.png +A doc/tcmalloc-opspercpusec.vs.threads.8192.bytes.png +A doc/tcmalloc-opspersec.vs.size.1.threads.png +A doc/tcmalloc-opspersec.vs.size.12.threads.png +A doc/tcmalloc-opspersec.vs.size.16.threads.png +A doc/tcmalloc-opspersec.vs.size.2.threads.png +A doc/tcmalloc-opspersec.vs.size.20.threads.png +A doc/tcmalloc-opspersec.vs.size.3.threads.png +A doc/tcmalloc-opspersec.vs.size.4.threads.png +A doc/tcmalloc-opspersec.vs.size.5.threads.png +A doc/tcmalloc-opspersec.vs.size.8.threads.png +M doc/tcmalloc.html +M packages/deb/changelog +M packages/deb/control +M packages/deb/copyright +D packages/deb/files +M packages/deb/libgoogle-perftools-dev.install +M packages/deb/libgoogle-perftools0.install +M packages/rpm.sh +M packages/rpm/rpm.spec +M src/addressmap-inl.h +R099 src/google/perftools/basictypes.h src/base/basictypes.h +M src/base/commandlineflags.h +A src/base/elfcore.c +A src/base/elfcore.h +A src/base/linuxthreads.c +A src/base/linuxthreads.h +A src/base/thread_lister.c +A src/base/thread_lister.h +M src/google/heap-checker.h +M src/google/heap-profiler.h +R088 src/google/malloc_interface.h src/google/malloc_extension.h +M src/google/malloc_hook.h +D src/google/perftools/config.h.in +M src/google/stacktrace.h +M src/heap-checker-bcad.cc +M src/heap-checker.cc +M src/heap-profiler-inl.h +M src/heap-profiler.cc +M src/internal_logging.h +M src/internal_spinlock.h +R068 src/malloc_interface.cc src/malloc_extension.cc +M src/malloc_hook.cc +A src/maybe_threads.cc +C070 src/tests/tcmalloc_unittest.cc src/maybe_threads.h +M src/pagemap.h +M src/pprof +M src/profiler.cc +M src/stacktrace.cc +M src/system-alloc.cc +M src/tcmalloc.cc +M src/tests/addressmap_unittest.cc +M src/tests/heap-checker-death_unittest.sh +M src/tests/heap-checker_unittest.cc +C051 src/tests/heap-checker-death_unittest.sh +src/tests/heap-checker_unittest.sh +A src/tests/heap-profiler_unittest.cc +A src/tests/heap-profiler_unittest.sh +M src/tests/profiler_unittest.cc +M src/tests/profiler_unittest.sh +M src/tests/stacktrace_unittest.cc +M src/tests/tcmalloc_unittest.cc + +commit 51b4875f8ade3e0930eed2dc2a842ec607a94a2c +Author: csilvers <csilvers@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Mar 22 03:00:33 2007 +0000 + + Tue Feb 8 09:57:17 2005 El Goog <opensource@google.com> + + * google-perftools: initial release: + The google-perftools package contains some utilities + to improve + and analyze the performance of C++ programs. This includes + an + optimized thread-caching malloc() and cpu and heap profiling + utilities. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@9 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +A AUTHORS +A COPYING +A ChangeLog +A INSTALL +A Makefile.am +A Makefile.in +A NEWS +A README +A TODO +A aclocal.m4 +A compile +A config.guess +A config.sub +A configure +A configure.ac +A depcomp +A doc/cpu_profiler.html +A doc/heap-example1.png +A doc/heap_profiler.html +A doc/overview.dot +A doc/overview.gif +A doc/pageheap.dot +A doc/pageheap.gif +A doc/pprof-test-big.gif +A doc/pprof-test.gif +A doc/pprof-vsnprintf-big.gif +A doc/pprof-vsnprintf.gif +A doc/pprof.1 +A doc/pprof.see_also +A doc/spanmap.dot +A doc/spanmap.gif +A doc/tcmalloc.html +A doc/threadheap.dot +A doc/threadheap.gif +A install-sh +A ltmain.sh +A missing +A mkinstalldirs +A packages/deb.sh +A packages/deb/README +A packages/deb/changelog +A packages/deb/compat +A packages/deb/control +A packages/deb/copyright +A packages/deb/files +A packages/deb/libgoogle-perftools-dev.dirs +A packages/deb/libgoogle-perftools-dev.install +A packages/deb/libgoogle-perftools0.dirs +A packages/deb/libgoogle-perftools0.install +A packages/deb/libgoogle-perftools0.manpages +A packages/deb/rules +A packages/rpm.sh +A packages/rpm/rpm.spec +A src/addressmap-inl.h +A src/base/commandlineflags.h +A src/base/googleinit.h +A src/base/logging.h +A src/config.h.in +A src/google/heap-checker.h +A src/google/heap-profiler.h +A src/google/malloc_hook.h +A src/google/malloc_interface.h +A src/google/perftools/basictypes.h +A src/google/perftools/config.h.in +A src/google/profiler.h +A src/google/stacktrace.h +A src/heap-checker-bcad.cc +A src/heap-checker.cc +A src/heap-profiler-inl.h +A src/heap-profiler.cc +A src/internal_logging.cc +A src/internal_logging.h +A src/internal_spinlock.h +A src/malloc_hook.cc +A src/malloc_interface.cc +A src/pagemap.h +A src/pprof +A src/profiler.cc +A src/stacktrace.cc +A src/system-alloc.cc +A src/system-alloc.h +A src/tcmalloc.cc +A src/tests/addressmap_unittest.cc +A src/tests/heap-checker-death_unittest.sh +A src/tests/heap-checker_unittest.cc +A src/tests/profiler_unittest.cc +A src/tests/profiler_unittest.sh +A src/tests/ptmalloc/COPYRIGHT +A src/tests/ptmalloc/lran2.h +A src/tests/ptmalloc/malloc-machine.h +A src/tests/ptmalloc/t-test.h +A src/tests/ptmalloc/t-test1.c +A src/tests/ptmalloc/t-test2.c +A src/tests/ptmalloc/thread-m.h +A src/tests/ptmalloc/thread-st.h +A src/tests/stacktrace_unittest.cc +A src/tests/tcmalloc_unittest.cc + +commit e3a8513447d1141f083d4aaced0b240a6e161f47 +Author: trowbridge.jon +<trowbridge.jon@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Dec 28 22:53:59 2006 +0000 + + Set page mime-type to text/html. + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@5 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +commit 66737d1c2519e4a1622f61139bfe2f683ea3696c +Author: trowbridge.jon +<trowbridge.jon@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Dec 28 22:39:33 2006 +0000 + + Import of HTML documentation from SourceForge. + + + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@3 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 + +A docs/html/cpu_profiler.html +A docs/html/heap_checker.html +A docs/html/heap_profiler.html +A docs/html/tcmalloc.html +A docs/images/heap-example1.png +A docs/images/overview.gif +A docs/images/pageheap.gif +A docs/images/pprof-test.gif +A docs/images/pprof-vsnprintf.gif +A docs/images/spanmap.gif +A docs/images/tcmalloc-opspercpusec.png +A docs/images/tcmalloc-opspercpusec_002.png +A docs/images/tcmalloc-opspercpusec_003.png +A docs/images/tcmalloc-opspercpusec_004.png +A docs/images/tcmalloc-opspercpusec_005.png +A docs/images/tcmalloc-opspercpusec_006.png +A docs/images/tcmalloc-opspercpusec_007.png +A docs/images/tcmalloc-opspercpusec_008.png +A docs/images/tcmalloc-opspercpusec_009.png +A docs/images/tcmalloc-opspersec.png +A docs/images/tcmalloc-opspersec_002.png +A docs/images/tcmalloc-opspersec_003.png +A docs/images/tcmalloc-opspersec_004.png +A docs/images/tcmalloc-opspersec_005.png +A docs/images/tcmalloc-opspersec_006.png +A docs/images/tcmalloc-opspersec_007.png +A docs/images/tcmalloc-opspersec_008.png +A docs/images/tcmalloc-opspersec_009.png +A docs/images/threadheap.gif + +commit 55d679a05f0518ea73a4bca6e8b71b54fcecf68f +Author: (no author) <(no author)@6b5cf1ce-ec42-a296-1ba9-69fdba395a50> +Date: Thu Jul 27 00:57:14 2006 +0000 + + Initial directory structure. + + git-svn-id: http://gperftools.googlecode.com/svn/trunk@1 + 6b5cf1ce-ec42-a296-1ba9-69fdba395a50 diff --git a/src/third_party/gperftools-2.7/ChangeLog.old b/src/third_party/gperftools-2.7/ChangeLog.old new file mode 100644 index 00000000000..4b334beaade --- /dev/null +++ b/src/third_party/gperftools-2.7/ChangeLog.old @@ -0,0 +1,646 @@ +Fri Feb 03 15:40:45 2012 Google Inc. <google-perftools@googlegroups.com> + + * gperftools: version 2.0 + * Renamed the project from google-perftools to gperftools (csilvers) + * Renamed the .deb/.rpm packagse from google-perftools to gperftools too + * Renamed include directory from google/ to gperftools/ (csilvers) + * Changed the 'official' perftools email in setup.py/etc + * Renamed google-perftools.sln to gperftools.sln + * PORTING: Removed bash-isms & grep -q in heap-checker-death_unittest.sh + * Changed copyright text to reflect Google's relinquished ownership + +Tue Jan 31 10:43:50 2012 Google Inc. <opensource@google.com> + + * google-perftools: version 1.10 release + * PORTING: Support for patching assembly on win x86_64! (scott.fr...) + * PORTING: Work around atexit-execution-order bug on freebsd (csilvers) + * PORTING: Patch _calloc_crt for windows (roger orr) + * PORTING: Add C++11 compatibility method for stl allocator (jdennett) + * PORTING: use MADV_FREE, not MADV_DONTNEED, on freebsd (csilvers) + * PORTING: Don't use SYS_open when not supported on solaris (csilvers) + * PORTING: Do not assume uname() returns 0 on success (csilvers) + * LSS: Improved ARM support in linux-syscall-support (dougkwan) + * LSS: Get rid of unused syscalls in linux-syscall-support (csilvers) + * LSS: Fix broken mmap wrapping for ppc (markus) + * LSS: Emit .cfi_adjust_cfa_offset when appropriate (ppluzhnikov) + * LSS: Be more accurate in register use in __asm__ (markus) + * LSS: Fix __asm__ calls to compile under clang (chandlerc) + * LSS: Fix ARM inline assembly bug around r7 and swi (lcwu) + * No longer log when an allocator fails (csilvers) + * void* -> const void* for MallocExtension methods (llib) + * Improve HEAP_PROFILE_MMAP and fix bugs with it (dmikurube) + * Replace int-based abs with more correct fabs in a test (pmurin) + +Thu Dec 22 16:22:45 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.9 release + * Lightweight check for double-frees (blount) + * BUGFIX: Fix pprof to exit properly if run with no args (dagitses) + * Suggest ASan as a way to diagnose buggy code (ppluzhnikov) + * Get rid of unused CACHELINE_SIZE (csilvers) + * Replace atexit() calls with global dtors; helps freebsd (csilvers) + * Disable heap-checker under AddressSanitizer (kcc) + * Fix bug in powerpc stacktracing (ppluzhnikov) + * PERF: Use exponential backoff waiting for spinlocks (m3b) + * Fix 64-bit nm on 32-bit binaries in pprof (csilvers) + * Add ProfileHandlerDisallowForever (rsc) + * BUGFIX: Shell escape when forking in pprof (csilvers) + * No longer combine overloaded functions in pprof (csilvers) + * Fix address-normalizing bug in pprof (csilvers) + * More consistently call abort() instead of exit() on failure (csilvers) + * Allow NoGlobalLeaks to be safely called more than once (csilvers) + * PORTING/BUGFIX: Fix ARM cycleclock to use volatile asm (dougkwan) + * PORTING: 64-bit atomic ops for ARMv7 (dougkwan) + * PORTING: Implement stacktrace for ARM (dougkwan) + * PORTING: Fix malloc_hook_mmap_linux for ARM (dougkwan) + * PORTING: Update linux_syscall_support.h for ARM/etc (evannier, sanek) + * PORTING: Fix freebsd to work on x86_64 (chapp...@gmail.com) + * PORTING: Added additional SYS_mmap fixes for FreeBSD (chappedm) + * PORTING: Allow us to compile on OS X 10.6 and run on 10.5 (raltherr) + * PORTING: Check for mingw compilers that *do* define timespec + * PORTING: Add "support" for MIPS cycletimer + * PORTING: Fix fallback cycle-timer to work with Now (dougkwan) + * PERF: Move stack trace collecting out of the mutex (taylorc) + * PERF: Get the deallocation stack trace outside the mutex (sean) + * Make PageHeap dynamically allocated for leak checks (maxim) + * BUGFIX: Fix probing of nm -f behavior in pprof (dpeng) + * BUGFIX: Fix a race with the CentralFreeList lock before main (sanjay) + * Support /pprof/censusprofile url arguments (rajatjain) + * Change IgnoreObject to return its argument (nlewycky) + * Update malloc-hook files to support more CPUs + * BUGFIX: write our own strstr to avoid libc problems (csilvers) + * Use simple callgrind compression facility in pprof + * Print an error message when we can't run pprof to symbolize (csilvers) + * Die in configure when g++ is't installed (csilvers) + * DOC: Beef up the documentation a bit about using libunwind (csilvers) + +Fri Aug 26 13:29:25 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.3 release + * Added back the 'pthreads unsafe early' #define, needed for FreeBSD + +Thu Aug 11 15:01:47 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.2 release + * Fixed calculation of patchlevel, 'make check' should all pass again + +Tue Jul 26 20:57:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8.1 release + * Added an #include to fix compile breakage on latest gcc's + * Removed an extra , in the configure.ac script + +Fri Jul 15 16:10:51 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.8 release + * PORTING: (Disabled) support for patching mmap on freebsd (chapp...) + * PORTING: Support volatile __malloc_hook for glibc 2.14 (csilvers) + * PORTING: Use _asm rdtsc and __rdtsc to get cycleclock in windows (koda) + * PORTING: Fix fd vs. HANDLE compiler error on cygwin (csilvers) + * PORTING: Do not test memalign or double-linking on OS X (csilvers) + * PORTING: Actually enable TLS on windows (jontra) + * PORTING: Some work to compile under Native Client (krasin) + * PORTING: deal with pthread_once w/o -pthread on freebsd (csilvers) + * Rearrange libc-overriding to make it easier to port (csilvers) + * Display source locations in pprof disassembly (sanjay) + * BUGFIX: Actually initialize allocator name (mec) + * BUGFIX: Keep track of 'overhead' bytes in malloc reporting (csilvers) + * Allow ignoring one object twice in the leak checker (glider) + * BUGFIX: top10 in pprof should print 10 lines, not 11 (rsc) + * Refactor vdso source files (tipp) + * Some documentation cleanups + * Document MAX_TOTAL_THREAD_CACHE_SIZE <= 1Gb (nsethi) + * Add MallocExtension::GetOwnership(ptr) (csilvers) + * BUGFIX: We were leaving out a needed $(top_srcdir) in the Makefile + * PORTING: Support getting argv0 on OS X + * Add 'weblist' command to pprof: like 'list' but html (sanjay) + * Improve source listing in pprof (sanjay) + * Cap cache sizes to reduce fragmentation (ruemmler) + * Improve performance by capping or increasing sizes (ruemmler) + * Add M{,un}mapReplacmenet hooks into MallocHook (ribrdb) + * Refactored system allocator logic (gangren) + * Include cleanups (csilvers) + * Add TCMALLOC_SMALL_BUT_SLOW support (ruemmler) + * Clarify that tcmalloc stats are MiB (robinson) + * Remove support for non-tcmalloc debugallocation (blount) + * Add a new test: malloc_hook_test (csilvers) + * Change the configure script to be more crosstool-friendly (mcgrathr) + * PORTING: leading-underscore changes to support win64 (csilvers) + * Improve debugallocation tc_malloc_size (csilvers) + * Extend atomicops.h and cyceclock to use ARM V6+ optimized code (sanek) + * Change malloc-hook to use a list-like structure (llib) + * Add flag to use MAP_PRIVATE in memfs_malloc (gangren) + * Windows support for pprof: nul and /usr/bin/file (csilvers) + * TESTING: add test on strdup to tcmalloc_test (csilvers) + * Augment heap-checker to deal with no-inode maps (csilvers) + * Count .dll/.dylib as shared libs in heap-checker (csilvers) + * Disable sys_futex for arm; it's not always reliable (sanek) + * PORTING: change lots of windows/port.h macros to functions + * BUGFIX: Generate correct version# in tcmalloc.h on windows (csilvers) + * PORTING: Some casting to make solaris happier about types (csilvers) + * TESTING: Disable debugallocation_test in 'minimal' mode (csilvers) + * Rewrite debugallocation to be more modular (csilvers) + * Don't try to run the heap-checker under valgrind (ppluzhnikov) + * BUGFIX: Make focused stat %'s relative, not absolute (sanjay) + * BUGFIX: Don't use '//' comments in a C file (csilvers) + * Quiet new-gcc compiler warnings via -Wno-unused-result, etc (csilvers) + +Fri Feb 04 15:54:31 2011 Google Inc. <opensource@google.com> + + * google-perftools: version 1.7 release + * Reduce page map key size under x86_64 by 4.4MB (rus) + * Remove a flaky malloc-extension test (fdabek) + * Improve the performance of PageHeap::New (ond..., csilvers) + * Improve sampling_test with no-inline additions/etc (fdabek) + * 16-byte align debug allocs (jyasskin) + * Change FillProcSelfMaps to detect out-of-buffer-space (csilvers) + * Document the need for sampling to use GetHeapSample (csilvers) + * Try to read TSC frequency from tsc_freq_khs (adurbin) + * Do better at figuring out if tests are running under gdb (ppluzhnikov) + * Improve spinlock contention performance (ruemmler) + * Better internal-function list for pprof's /contention (ruemmler) + * Speed up GoogleOnce (m3b) + * Limit number of incoming/outgoing edges in pprof (sanjay) + * Add pprof --evince to go along with --gv (csilvers) + * Document the various ways to get heap-profiling information (csilvers) + * Separate out synchronization profiling routines (ruemmler) + * Improve malloc-stats output to be more understandable (csilvers) + * Add support for census profiler in pporf (nabeelmian) + * Document how pprof's /symbol must support GET requests (csilvers) + * Improve acx_pthread.m4 (ssuomi, liujisi) + * Speed up pprof's ExtractSymbols (csilvers) + * Ignore some known-leaky (java) libraries in the heap checker (davidyu) + * Make kHideMask use all 64 bits in tests (ppluzhnikov) + * Clean up pprof input-file handling (csilvers) + * BUGFIX: Don't crash if __environ is NULL (csilvers) + * BUGFIX: Fix totally broken debugallocation tests (csilvers) + * BUGFIX: Fix up fake_VDSO handling for unittest (ppluzhnikov) + * BUGFIX: Suppress all large allocs when report threshold is 0 (lexie) + * BUGFIX: mmap2 on i386 takes an off_t, not off64_t (csilvers) + * PORTING: Add missing PERFTOOLS_DLL_DECL (csilvers) + * PORTING: Add stddef.h to make newer gcc's happy (csilvers) + * PORTING: Document some tricks for working under OS X (csilvers) + * PORTING: Don't try to check valgrind for windows (csilvers) + * PORTING: Make array-size a var to compile under clang (chandlerc) + * PORTING: No longer hook _aligned_malloc and _aligned_free (csilvers) + * PORTING: Quiet some gcc warnings (csilvers) + * PORTING: Replace %PRIxPTR with %p to be more portable (csilvers) + * PORTING: Support systems that capitalize /proc weirdly (sanek) + * PORTING: Treat arm3 the same as arm5t in cycletimer (csilvers) + * PORTING: Update windows logging to not allocate memory (csilvers) + * PORTING: avoid double-patching newer windows DLLs (roger.orr) + * PORTING: get dynamic_annotations.c to work on windows (csilvers) + * Add pkg-config .pc files for the 5 libraries we produce (csilvers) + * Added proper libtool versioning, so this lib will be 0.1.0 (csilvers) + * Moved from autoconf 2.64 to 2.65 + +Thu Aug 5 12:48:03 PDT 2010 Google Inc. <opensource@google.com> + + * google-perftools: version 1.6 release + * Add tc_malloc_usable_size for compatibility with glibc (csilvers) + * Override malloc_usable_size with tc_malloc_usable_size (csilvers) + * Default to no automatic heap sampling in tcmalloc (csilvers) + * Add -DTCMALLOC_LARGE_PAGES, a possibly faster tcmalloc (rus) + * Make some functions extern "C" to avoid false ODR warnings (jyasskin) + * pprof: Add SVG-based output (rsc) + * pprof: Extend pprof --tools to allow per-tool configs (csilvers) + * pprof: Improve support of 64-bit and big-endian profiles (csilvers) + * pprof: Add interactive callgrind suport (weidenri...) + * pprof: Improve address->function mapping a bit (dpeng) + * Better detection of when we're running under valgrind (csilvers) + * Better CPU-speed detection under valgrind (saito) + * Use, and recommend, -fno-builtin-malloc when compiling (csilvers) + * Avoid false-sharing of memory between caches (bmaurer) + * BUGFIX: Fix heap sampling to use correct alloc size (bmauer) + * BUGFIX: Avoid gcc 4.0.x bug by making hook-clearing atomic (csilvers) + * BUGFIX: Avoid gcc 4.5.x optimization bug (csilvers) + * BUGFIX: Work around deps-determining bug in libtool 1.5.26 (csilvers) + * BUGFIX: Fixed test to use HAVE_PTHREAD, not HAVE_PTHREADS (csilvers) + * BUGFIX: Fix tls callback behavior on windows when using wpo (wtc) + * BUGFIX: properly align allocation sizes on Windows (antonm) + * BUGFIX: Fix prototypes for tcmalloc/debugalloc wrt throw() (csilvers) + * DOC: Updated heap-checker doc to match reality better (fischman) + * DOC: Document ProfilerFlush, ProfilerStartWithOptions (csilvers) + * DOC: Update docs for heap-profiler functions (csilvers) + * DOC: Clean up documentation around tcmalloc.slack_bytes (fikes) + * DOC: Renamed README.windows to README_windows.txt (csilvers) + * DOC: Update the NEWS file to be non-empty (csilvers) + * PORTING: Fix windows addr2line and nm with proper rc code (csilvers) + * PORTING: Add CycleClock and atomicops support for arm 5 (sanek) + * PORTING: Improve PC finding on cygwin and redhat 7 (csilvers) + * PORTING: speed up function-patching under windows (csilvers) + +Tue Jan 19 14:46:12 2010 Google Inc. <opensource@google.com> + + * google-perftools: version 1.5 release + * Add tc_set_new_mode (willchan) + * Make memalign functions + realloc respect tc_set_new_mode (willchan) + * Add ReleaseToSystem(num_bytes) (kash) + * Handle zero-length symbols a bit better in pprof (csilvers) + * Prefer __environ to /proc/self/environ in cpu profiler (csilvers) + * Add HEAP_CHECK_MAX_LEAKS flag to control #leaks to report (glider) + * Add two new numeric pageheap properties to MallocExtension (fikes) + * Print alloc size when mmap fails (hakon) + * Add ITIMER_REAL support to cpu profiler (csilvers, nabeelmian) + * Speed up symbolizer in heap-checker reporting (glider) + * Speed up futexes with FUTEX_PRIVATE_FLAG (m3b) + * Speed up tcmalloc but doing better span coalescing (sanjay) + * Better support for different wget's and addr2maps in pprof (csilvres) + * Implement a nothrow version of delete and delete[] (csilvers) + * BUGFIX: fix a race on module_libcs[i] in windows patching (csilvers) + * BUGFIX: Fix debugallocation to call cpp_alloc for new (willchan) + * BUGFIX: A simple bugfix for --raw mode (mrabkin) + * BUGFIX: Fix C shims to actually be valid C (csilvers) + * BUGFIX: Fix recursively-unmapped-region accounting (ppluzhnikov) + * BUGFIX: better distinguish real and fake vdso (ppluzhnikov) + * WINDOWS: replace debugmodule with more reliable psai (andrey) + * PORTING: Add .bundle as another shared library extension (csilvers) + * PORTING: Fixed a typo bug in the ocnfigure PRIxx m4 macro (csilvers) + * PORTING: Augment sysinfo to work on 64-bit OS X (csilvers) + * PORTING: Use sys/ucontext.h to fix compiing on OS X 10.6 (csilvers) + * PORTING: Fix sysinfo libname reporting for solaris x86 (jeffrey) + * PORTING: Use libunwind for i386 when using --omitfp (ppluzhnikov) + +Thu Sep 10 13:51:15 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.4 release + * Add debugallocation library, to catch memory leaks, stomping, etc + * Add --raw mode to allow for delayed processing of pprof files + * Use less memory when reading CPU profiles + * New environment variables to control kernel-allocs (sbrk, memfs, etc) + * Add MarkThreadBusy(): performance improvement + * Remove static thread-cache-size code; all is dynamic now + * Add new HiddenPointer class to heap checker + * BUGFIX: pvalloc(0) allocates now (found by new debugalloc library) + * BUGFIX: valloc test (not implementation) no longer overruns memory + * BUGFIX: GetHeapProfile no longer deadlocks + * BUGFIX: Support unmapping memory regions before main + * BUGFIX: Fix some malloc-stats formatting + * BUGFIX: Don't crash as often when freeing libc-allocated memory + * BUGFIX: Deal better with incorrect PPROF_PATH when symbolizing + * BUGFIX: weaken new/delete/etc in addition to malloc/free/etc + * BUGFIX: Fix return value of GetAllocatedSize + * PORTING: Fix mmap-#define problem on some 64-bit systems + * PORTING: Call ranlib again (some OS X versions need it) + * PORTING: Fix a leak when building with LLVM + * PORTING: Remove some unneeded bash-ishs from testing scripts + * WINDOWS: Support library unloading as well as loading + * WINDOWS/BUGFIX: Set page to 'xrw' instead of 'rw' when patching + +Tue Jun 9 18:19:06 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.3 release + * Provide our own name for memory functions: tc_malloc, etc (csilvers) + * Weaken memory-alloc functions so user can override them (csilvers) + * Remove meaningless delete(nothrow) and delete[](nothrow) (csilvers) + * BUILD: replace clever libtcmalloc/profiler.a with a new .a (csilvers) + * PORTING: improve windows port by using google spinlocks (csilvers) + * PORTING: Fix RedHat 9 memory allocation in heapchecker (csilvers) + * PORTING: Rename OS_WINDOWS macro to PLATFORM_WINDOWS (mbelshe) + * PORTING/BUGFIX: Make sure we don't clobber GetLastError (mbelshe) + * BUGFIX: get rid of useless data for callgrind (weidenrinde) + * BUGFIX: Modify windows patching to deadlock sometimes (csilvers) + * BUGFIX: an improved fix for hook handling during fork (csilvers) + * BUGFIX: revamp profiler_unittest.sh, which was very broken (csilvers) + +Fri Apr 17 16:40:48 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.2 release + * Allow large_alloc_threshold=0 to turn it off entirely (csilvers) + * Die more helpfully when out of memory for internal data (csilvers) + * Refactor profile-data gathering, add a new unittest (cgd, nabeelmian) + * BUGFIX: fix rounding errors with static thread-size caches (addi) + * BUGFIX: disable hooks better when forking in leak-checker (csilvers) + * BUGFIX: fix realloc of crt pointers on windows (csilvers) + * BUGFIX: do a better job of finding binaries in .sh tests (csilvers) + * WINDOWS: allow overriding malloc/etc instead of patching (mbelshe) + * PORTING: fix compilation error in a ppc-specific file (csilvers) + * PORTING: deal with quirks in cygwin's /proc/self/maps (csilvers) + * PORTING: use 'A' version of functions for ascii input (mbelshe) + * PORTING: generate .so's on cygwin and mingw (ajenjo) + * PORTING: disable profiler methods on cygwin (jperkins) + * Updated autoconf version to 2.61 and libtool version to 1.5.26 + +Wed Mar 11 11:25:34 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.1 release + * Dynamically resize thread caches -- nice perf. improvement (kash) + * Add VDSO support to give better stacktraces in linux (ppluzhnikov) + * Improve heap-profiling sampling algorithm (ford) + * Rewrite leak-checking code: should be faster and more robust (sanjay) + * Use ps2 instead of ps for dot: better page cropping for gv (csilvers) + * Disable malloc-failure warning messages by default (csilvers) + * Update config/Makefile to disable tests on a per-OS basis (csilvers) + * PORTING: Get perftools compiling under MSVC 7.1 again (csilvers) + * PORTING: Get perftools compiling under cygwin again (csilvers) + * PORTING: automatically set library flags for solaris x86 (csilvers) + * Add TCMALLOC_SKIP_SBRK to mirror TCMALLOC_SKIP_MMAP (csilvers) + * Add --enable flags to allow selective building (csilvers) + * Put addr2line-pdb and nm-pdb in proper output directory (csilvers) + * Remove deprecated DisableChecksIn (sanjay) + * DOCUMENTATION: Document most MallocExtension routines (csilvers) + +Tue Jan 6 13:58:56 2009 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0 release + * Exactly the same as 1.0rc2 + +Sun Dec 14 17:10:35 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0rc2 release + * Fix compile error on 64-bit systems (casting ptr to int) (csilvers) + +Thu Dec 11 16:01:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 1.0rc1 release + * Replace API for selectively disabling heap-checker in code (sanjay) + * Add a pre-mmap hook (daven, adlr) + * Add MallocExtension interface to set memory-releasing rate (fikes) + * Augment pprof to allow any string ending in /pprof/profile (csilvers) + * PORTING: Rewrite -- and fix -- malloc patching for windows (dvitek) + * PORTING: Add nm-pdb and addr2line-pdb for use by pprof (dvitek) + * PORTING: Improve cygwin and mingw support (jperkins, csilvers) + * PORTING: Fix pprof for mac os x, other pprof improvements (csilvers) + * PORTING: Fix some PPC bugs in our locking code (anton.blanchard) + * A new unittest, smapling_test, to verify tcmalloc-profiles (csilvers) + * Turn off TLS for gcc < 4.1.2, due to a TLS + -fPIC bug (csilvers) + * Prefer __builtin_frame_address to assembly for stacktraces (nlewycky) + * Separate tcmalloc.cc out into multiple files -- finally! (kash) + * Make our locking code work with -fPIC on 32-bit x86 (aruns) + * Fix an initialization-ordering bug for tcmalloc/profiling (csilvers) + * Use "initial exec" model of TLS to speed up tcmalloc (csilvers) + * Enforce 16-byte alignment for tcmalloc, for SSE (sanjay) + +Tue Sep 23 08:56:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.2 release + * COMPILE FIX: add #include needed for FreeBSD and OS X (csilvers) + +Sat Sep 20 09:37:18 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99.1 release + * BUG FIX: look for nm, etc in /usr/bin, not /usr/crosstool (csilvers) + +Thu Sep 18 16:00:27 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.99 release + * Add IsHeapProfileRunning (csilvers) + * Add C shims for some of the C++ header files (csilvers) + * Fix heap profile file clean-up logic (maxim) + * Rename linuxthreads.c to .cc for better compiler support (csilvers) + * Add source info to disassembly in pprof (sanjay) + * Use open instead of fopen to avoid memory alloc (csilvers) + * Disable malloc extensions when running under valgrind (kcc) + * BUG FIX: Fix out-of-bound error by reordering a check (larryz) + * Add Options struct to ProfileData (cgd) + * Correct PC-handling of --base in pprof (csilvers) + * Handle 1 function occurring twice in an image (sanjay) + * Improve stack-data cleaning (maxim) + * Use 'struct Foo' to make header C compatible (csilvers) + * Add 'total' line to pprof --text (csilvers) + * Pre-allocate buffer for heap-profiler to avoid OOM errors (csilvers) + * Allow a few more env-settings to control tcmalloc (csilvers) + * Document some of the issues involving thread-local storage (csilvers) + * BUG FIX: Define strtoll and friends for windows (csilvers) + +Mon Jun 9 16:47:03 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.98 release + * Add ProfilerStartWithOptions() (cgd) + * Change tcmalloc_minimal to not do any stack-tracing at all (csilvers) + * Prefer mmap to sbrk for 64-buit debug mode (sanjay) + * Fix accounting for some tcmalloc stats (sanjay) + * Use setrlimit() to keep unittests from killing the machine (odo) + * Fix a bug when sbrk-ing near address 4G (csilvers) + * Make MallocHook thread-safe (jyasskin) + * Fix windows build for MemoryBarrier (jyasskin) + * Fix CPU-profiler docs to mention correct libs (csilvers) + * Fix for GetHeapProfile() when heap-profiling is off (maxim) + * Avoid realloc resizing ping-pongs using hysteresis (csilvers) + * Add --callgrind output support to pprof (klimek) + * Fix profiler.h and heap-profiler.h to be C-compatible (csilvers) + * Break malloc_hook.h into two parts to reduce dependencies (csilvers) + * Better handle systems that don't implement mmap (csilvers) + * PORTING: disable system_alloc_unittest for msvc (csilvers) + * PORTING: Makefile tweaks to build better on cygwin (csilvers) + +Mon Apr 21 15:20:52 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.97 release + * Refactor GetHeapProfile to avoid using malloc (maxim) + * Fix heap-checker and heap-profiler hook interactions (maxim) + * Fix a data race in MemoryRegionMap::Lock (jyasskin) + * Improve thread-safety of leak checker (maxim) + * Fix mmap profile to no longer deadlock (maxim) + * Fix rpm to have devel package depend on non-devel (csilvers) + * PORTING: Fix clock-speed detection for Mac OS X (csilvers) + +Tue Mar 18 14:30:44 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.96 release + * major atomicops rewrite; fixed atomic ops code for linux/ppc (vchen) + * nix the stacktrace library; now build structure is simpler (csilvers) + * Speed up heap-checker, and reduce extraneous logging (maxim) + * Improve itimer code for NPTL case (cgd) + * Add source code annotations for use by valgrind, etc (kcc) + * PORTING: Fix high resolution timers for Mac OS X (adlr) + +Tue Feb 19 12:01:31 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95.1 release (bugfix release) + * x86_64 compile-fix: nix pread64 and pwrite64 (csilvers) + * more heap-checker debug logging (maxim) + * minor improvement to x86_64 CycleClock (gpike) + +Tue Feb 12 12:28:32 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.95 release + * Better -- not perfect -- support for linux-ppc (csilvers) + * Fix race condition in libunwind stacktrace (aruns) + * Speed up x86 spinlock locking (m3b) + * Improve heap-checker performance (maxim) + * Heap checker traverses more ptrs inside heap-alloced objects (maxim) + * Remove deprecated ProfilerThreadState function (cgd) + * Update libunwind documentation for statically linked binaries (aruns) + +Mon Dec 3 23:51:54 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94.1 release (bugfix release) + * Fix missing #includes for x86_64 compile using libunwind (csilvers) + +Thu Nov 29 07:59:43 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.94 release + * PORTING: MinGW/Msys support -- runs same code as MSVC does (csilvers) + * PORTING: Add NumCPUs support for Mac OS X (csilvers) + * Work around a sscanf bug in glibc(?) (waldemar) + * Fix Windows MSVC bug triggered by thread deletion (csilvers) + * Fix bug that triggers in MSVC /O2: missing volatile (gpike) + * March-of-time support: quiet warnings/errors for gcc 4.2, OS X 10.5 + * Modify pprof so it works without nm: useful for windows (csilvers) + * pprof: Support filtering for CPU profiles (cgd) + * Bugfix: have realloc report to hooks in all situations (maxim) + * Speed improvement: replace slow memcpy with std::copy (soren) + * Speed: better iterator efficiency in RecordRegionRemoval (soren) + * Speed: minor speed improvements via better bitfield alignment (gpike) + * Documentation: add documentation of binary profile output (cgd) + +Fri Aug 17 12:32:56 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.93 release + * PORTING: everything compiles on Solaris, OS X, FreeBSD (see INSTALL) + * PORTING: cpu-profiler works on most platforms (much better GetPC()) + * PORTING: heap-profiler works on most platforms + * PORTING: improved windows support, including release builds + * No longer build or run ptmalloc tests by default + * Add support for using memfs filesystem to allocate memory in linux + * WINDOWS: give debug library and release library different names + +Tue Jul 17 22:26:27 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.92 release + * PERFORMANCE: use a packed cache to speed up tcmalloc + * PORTING: preliminary windows support! (see README.windows) + * PORTING: better support for solaris, OS X, FreeBSD (see INSTALL) + * Envvar support for running the heap-checker under gdb + * Add weak declarations to maybe_threads to fix no-pthreads compile bugs + * Some 64bit fixes, especially with pprof + * Better heap-checker support for some low-level allocations + * Fix bug where heap-profiles would sometimes get truncated + * New documentation about how to handle common heap leak situations + * Use computed includes for hash_map/set: easier config + * Added all used .m4 templates to the distribution + +Wed Apr 18 16:43:55 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.91 release + * Brown-paper-bag bugfix: compilation error on some x86-64 machines + +Fri Apr 13 14:50:51 2007 Google Inc. <opensource@google.com> + + * google-perftools: version 0.90 release + * (As the version-number jump hints, this is a major new release: + almost every piece of functionality was rewritten. I can't do + justice to all the changes, but will concentrate on highlights.) + *** USER-VISIBLE CHANGES: + * Ability to "release" unused memory added to tcmalloc + * Exposed more tweaking knobs via environment variables (see docs) + * pprof tries harder to map addresses to functions + * tcmalloc_minimal compiles and runs on FreeBSD 6.0 and Solaris 10 + *** INTERNAL CHANGES: + * Much better 64-bit support + * Better multiple-processor support (e.g. multicore contention tweaks) + * Support for recent kernel ABI changes (e.g. new arg to mremap) + * Addition of spinlocks to tcmalloc to reduce contention cost + * Speed up tcmalloc by using __thread on systems that support TLS + * Total redesign of heap-checker to improve liveness checking + * More portable stack-frame analysis -- no more hard-coded constants! + * Disentangled heap-profiler code and heap-checker code + * Several new unittests to test, e.g., thread-contention costs + * Lots of small (but important!) bug fixes: e.g., fixing GetPC on amd64 + *** KNOWN PROBLEMS: + * CPU-profiling may crash on x86_64 (64-bit) systems. See the README + * Profiling/heap-checking may deadlock on x86_64 systems. See README + +Wed Jun 14 15:11:14 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.8 release + * Experimental support for remote profiling added to pprof (many) + * Fixed race condition in ProfileData::FlushTable (etune) + * Better support for weird /proc maps (maxim, mec) + * Fix heap-checker interaction with gdb (markus) + * Better 64-bit support in pprof (aruns) + * Reduce scavenging cost in tcmalloc by capping NumMoveSize (sanjay) + * Cast syscall(SYS_mmap); works on more 64-bit systems now (menage) + * Document the text output of pprof! (csilvers) + * Better compiler support for no-THREADS and for old compilers (csilvers) + * Make libunwind the default stack unwinder for x86-64 (aruns) + * Somehow the COPYING file got erased. Regenerate it (csilvers) + +Thu Apr 13 20:59:09 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.7 release + * Major rewrite of thread introspection for new kernels (markus) + * Major rewrite of heap-checker to use new thread tools (maxim) + * Add proper support for following data in thread registers (maxim) + * Syscall support for older kernels, including _syscall6 (markus) + * Support PIC mode (markus, mbland, iant) + * Better support for running in non-threaded contexts (csilvers) + +Fri Jan 27 14:04:27 2006 Google Inc. <opensource@google.com> + + * google-perftools: version 0.6 release + * More sophisticated stacktrace usage, possibly using libunwind (aruns) + * Update pprof to handle 64-bit profiles (dehnert) + * Fix GetStackTrace to correctly return top stackframe (sanjay) + * Add ANSI compliance for new and new[], including new_handler (jkearney) + * More accuracy by reading ELF files directly rather than objdump (mec) + * Add readline support for pprof (addi) + * Add #includes for PPC (csilvers) + * New PC-detection routine for ibook powerpc (asbestoshead) + * Vastly improved tcmalloc unittest (csilvers) + * Move documentation from /usr/doc to /usr/share/doc + +Mon Nov 14 17:28:59 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.5 release + * Add va_start/va_end calls around vsnprintf() (csilvers) + * Write our own __syscall_return(), since it's not defined + consistently on all 64-bit linux distros (markus) + +Wed Oct 26 15:19:16 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.4 release + * Decrease fragmentation in tcmalloc (lefevere) + * Support for ARM in some of the thread-specific code (markus) + * Turn off heap-checker for statically-linked binaries, which + cause error leak reports now (etune) + * Many pprof improvements, including a command-line interface (jeff) + * CPU profiling now automatically affects all threads in linux 2.6. + (Kernel bugs break CPU profiling and threads in linux 2.4 a bit.) + ProfilerEnable() and ProfilerDisable() are deprecated. (sanjay) + * tcmalloc now correctly intercepts memalign (m3b, maxim) + * Syntax fix: added missing va_end()s. Helps non-gcc compiling (etune) + * Fixed a few coredumper bugs: race condition after PTRACE_DETACH, + ignore non-aligned stackframe pointers (markus, menage) + * 64-bit cleanup, especially for spinlock code (etune) and mmap (sanjay) + * Better support for finding threads in linux (markus) + * tcmalloc now tracks those stack traces that allocate memory (sanjay) + * Work around a weird setspecific problem (sanjay) + * Fix tcmalloc overflow problems when an alloc is close to 2G/4G (sanjay) + +Fri Jun 24 18:02:26 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.3 release + * Add missing errno include for one of the unittests (csilvers) + * Reduce tcmalloc startup memory from 5M to 256K (sanjay) + * Add support for mallopt() and mallinfo (sanjay) + * Improve stacktrace's performance on some 64-bit systems (etune) + * Improve the stacktrace unittest (etune) + +Tue May 31 08:14:38 2005 Google Inc. <opensource@google.com> + + * google-perftools: version 0.2 release + * Use mmap2() instead of mmap(), to map more memory (menage) + * Do correct pthread-local checking in heap-checker! (maxim) + * Avoid overflow on 64-bit machines in pprof (sanjay) + * Add a few more GetPC() functions, including for AMD (csilvers) + * Better method for overriding pthread functions (menage) + * (Hacky) fix to avoid overwriting profile files after fork() (csilvers) + * Crashing bugfix involving dumping heaps on small-stack threads (tudor) + * Allow library versions with letters at the end (csilvers) + * Config fixes for systems that don't define PATH_MAX (csilvers) + * Confix fixes so we no longer need config.h after install (csilvers) + * Fix to pprof to correctly read very big cpu profiles (csilvers) + * Fix to pprof to deal with new commandline flags in modern gv's + * Better error reporting when we can't access /proc/maps (etune) + * Get rid of the libc-preallocate code (which could crash on some + systems); no longer needed with local-threads fix (csilvers) + +Tue Feb 8 09:57:17 2005 Google Inc. <opensource@google.com> + + * google-perftools: initial release: + The google-perftools package contains some utilities to improve + and analyze the performance of C++ programs. This includes an + optimized thread-caching malloc() and cpu and heap profiling + utilities. diff --git a/src/third_party/gperftools-2.7/INSTALL b/src/third_party/gperftools-2.7/INSTALL new file mode 100644 index 00000000000..f9a6a117289 --- /dev/null +++ b/src/third_party/gperftools-2.7/INSTALL @@ -0,0 +1,563 @@ +Copyright 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software +Foundation, Inc. + + This file is free documentation; the Free Software Foundation gives +unlimited permission to copy, distribute and modify it. + + +Perftools-Specific Install Notes +================================ + +*** Building from source repository + +As of 2.1 gperftools does not have configure and other autotools +products checked into it's source repository. This is common practice +for projects using autotools. + +NOTE: Source releases (.tar.gz that you download from +code.google.com/p/gperftools) still have all required files just as +before. Nothing has changed w.r.t. building from .tar.gz releases. + +But, in order to build gperftools checked out from subversion +repository you need to have autoconf, automake and libtool +installed. And before running ./configure you have to generate it (and +a bunch of other files) by running ./autogen.sh script. That script +will take care of calling correct autotools programs in correct order. + +If you're maintainer then it's business as usual too. Just run make +dist (or, preferably, make distcheck) and it'll produce .tar.gz or +.tar.bz2 with all autotools magic already included. So that users can +build our software without having autotools. + + +*** NOTE FOR 64-BIT LINUX SYSTEMS + +The glibc built-in stack-unwinder on 64-bit systems has some problems +with the perftools libraries. (In particular, the cpu/heap profiler +may be in the middle of malloc, holding some malloc-related locks when +they invoke the stack unwinder. The built-in stack unwinder may call +malloc recursively, which may require the thread to acquire a lock it +already holds: deadlock.) + +For that reason, if you use a 64-bit system, we strongly recommend you +install libunwind before trying to configure or install gperftools. +libunwind can be found at + + http://download.savannah.gnu.org/releases/libunwind/libunwind-0.99-beta.tar.gz + +Even if you already have libunwind installed, you should check the +version. Versions older than this will not work properly; too-new +versions introduce new code that does not work well with perftools +(because libunwind can call malloc, which will lead to deadlock). + +There have been reports of crashes with libunwind 0.99 (see +http://code.google.com/p/gperftools/issues/detail?id=374). +Alternately, you can use a more recent libunwind (e.g. 1.0.1) at the +cost of adding a bit of boilerplate to your code. For details, see +http://groups.google.com/group/google-perftools/msg/2686d9f24ac4365f + + CAUTION: if you install libunwind from the url above, be aware that + you may have trouble if you try to statically link your binary with + perftools: that is, if you link with 'gcc -static -lgcc_eh ...'. + This is because both libunwind and libgcc implement the same C++ + exception handling APIs, but they implement them differently on + some platforms. This is not likely to be a problem on ia64, but + may be on x86-64. + + Also, if you link binaries statically, make sure that you add + -Wl,--eh-frame-hdr to your linker options. This is required so that + libunwind can find the information generated by the compiler + required for stack unwinding. + + Using -static is rare, though, so unless you know this will affect + you it probably won't. + +If you cannot or do not wish to install libunwind, you can still try +to use the built-in stack unwinder. The built-in stack unwinder +requires that your application, the tcmalloc library, and system +libraries like libc, all be compiled with a frame pointer. This is +*not* the default for x86-64. + +If you are on x86-64 system, know that you have a set of system +libraries with frame-pointers enabled, and compile all your +applications with -fno-omit-frame-pointer, then you can enable the +built-in perftools stack unwinder by passing the +--enable-frame-pointers flag to configure. + +Even with the use of libunwind, there are still known problems with +stack unwinding on 64-bit systems, particularly x86-64. See the +"64-BIT ISSUES" section in README. + +If you encounter problems, try compiling perftools with './configure +--enable-frame-pointers'. Note you will need to compile your +application with frame pointers (via 'gcc -fno-omit-frame-pointer +...') in this case. + + +*** TCMALLOC LARGE PAGES: TRADING TIME FOR SPACE + +You can set a compiler directive that makes tcmalloc faster, at the +cost of using more space (due to internal fragmentation). + +Internally, tcmalloc divides its memory into "pages." The default +page size is chosen to minimize memory use by reducing fragmentation. +The cost is that keeping track of these pages can cost tcmalloc time. +We've added a new flag to tcmalloc that enables a larger page size. +In general, this will increase the memory needs of applications using +tcmalloc. However, in many cases it will speed up the applications +as well, particularly if they allocate and free a lot of memory. We've +seen average speedups of 3-5% on Google applications. + +To build libtcmalloc with large pages you need to use the +--with-tcmalloc-pagesize=ARG configure flag, e.g.: + + ./configure <other flags> --with-tcmalloc-pagesize=32 + +The ARG argument can be 8, 32 or 64 which sets the internal page size to +8K, 32K and 64K repectively. The default is 8K. + + +*** SMALL TCMALLOC CACHES: TRADING SPACE FOR TIME + +You can set a compiler directive that makes tcmalloc use less memory +for overhead, at the cost of some time. + +Internally, tcmalloc keeps information about some of its internal data +structures in a cache. This speeds memory operations that need to +access this internal data. We've added a new, experimental flag to +tcmalloc that reduces the size of this cache, decresaing the memory +needs of applications using tcmalloc. + +This feature is still very experimental; it's not even a configure +flag yet. To build libtcmalloc with smaller internal caches, run + + ./configure <normal flags> CXXFLAGS=-DTCMALLOC_SMALL_BUT_SLOW + +(or add -DTCMALLOC_SMALL_BUT_SLOW to your existing CXXFLAGS argument). + + +*** NOTE FOR ___tls_get_addr ERROR + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just comment out the line + #define HAVE_TLS 1 +in your config.h file before building. + + +*** TCMALLOC AND DLOPEN + +To improve performance, we use the "initial exec" model of Thread +Local Storage in tcmalloc. The price for this is the library will not +work correctly if it is loaded via dlopen(). This should not be a +problem, since loading a malloc-replacement library via dlopen is +asking for trouble in any case: some data will be allocated with one +malloc, some with another. If, for some reason, you *do* need to use +dlopen on tcmalloc, the easiest way is to use a version of tcmalloc +with TLS turned off; see the ___tls_get_addr note above. + + +*** COMPILING ON NON-LINUX SYSTEMS + +Perftools has been tested on the following systems: + FreeBSD 6.0 (x86) + FreeBSD 8.1 (x86_64) + Linux CentOS 5.5 (x86_64) + Linux Debian 4.0 (PPC) + Linux Debian 5.0 (x86) + Linux Fedora Core 3 (x86) + Linux Fedora Core 4 (x86) + Linux Fedora Core 5 (x86) + Linux Fedora Core 6 (x86) + Linux Fedora Core 13 (x86_64) + Linux Fedora Core 14 (x86_64) + Linux RedHat 9 (x86) + Linux Slackware 13 (x86_64) + Linux Ubuntu 6.06.1 (x86) + Linux Ubuntu 6.06.1 (x86_64) + Linux Ubuntu 10.04 (x86) + Linux Ubuntu 10.10 (x86_64) + Mac OS X 10.3.9 (Panther) (PowerPC) + Mac OS X 10.4.8 (Tiger) (PowerPC) + Mac OS X 10.4.8 (Tiger) (x86) + Mac OS X 10.5 (Leopard) (x86) + Mac OS X 10.6 (Snow Leopard) (x86) + Solaris 10 (x86_64) + Windows XP, Visual Studio 2003 (VC++ 7.1) (x86) + Windows XP, Visual Studio 2005 (VC++ 8) (x86) + Windows XP, Visual Studio 2005 (VC++ 9) (x86) + Windows XP, Visual Studio 2005 (VC++ 10) (x86) + Windows XP, MinGW 5.1.3 (x86) + Windows XP, Cygwin 5.1 (x86) + +It works in its full generality on the Linux systems +tested (though see 64-bit notes above). Portions of perftools work on +the other systems. The basic memory-allocation library, +tcmalloc_minimal, works on all systems. The cpu-profiler also works +fairly widely. However, the heap-profiler and heap-checker are not +yet as widely supported. In general, the 'configure' script will +detect what OS you are building for, and only build the components +that work on that OS. + +Note that tcmalloc_minimal is perfectly usable as a malloc/new +replacement, so it is possible to use tcmalloc on all the systems +above, by linking in libtcmalloc_minimal. + +** FreeBSD: + + The following binaries build and run successfully (creating + libtcmalloc_minimal.so and libprofile.so in the process): + % ./configure + % make tcmalloc_minimal_unittest tcmalloc_minimal_large_unittest \ + addressmap_unittest atomicops_unittest frag_unittest \ + low_level_alloc_unittest markidle_unittest memalign_unittest \ + packed_cache_test stacktrace_unittest system_alloc_unittest \ + thread_dealloc_unittest profiler_unittest.sh + % ./tcmalloc_minimal_unittest # to run this test + % [etc] # to run other tests + + Three caveats: first, frag_unittest tries to allocate 400M of memory, + and if you have less virtual memory on your system, the test may + fail with a bad_alloc exception. + + Second, profiler_unittest.sh sometimes fails in the "fork" test. + This is because stray SIGPROF signals from the parent process are + making their way into the child process. (This may be a kernel + bug that only exists in older kernels.) The profiling code itself + is working fine. This only affects programs that call fork(); for + most programs, the cpu profiler is entirely safe to use. + + Third, perftools depends on /proc to get shared library + information. If you are running a FreeBSD system without proc, + perftools will not be able to map addresses to functions. Some + unittests will fail as a result. + + Finally, the new test introduced in perftools-1.2, + profile_handler_unittest, fails on FreeBSD. It has something to do + with how the itimer works. The cpu profiler test passes, so I + believe the functionality is correct and the issue is with the test + somehow. If anybody is an expert on itimers and SIGPROF in + FreeBSD, and would like to debug this, I'd be glad to hear the + results! + + libtcmalloc.so successfully builds, and the "advanced" tcmalloc + functionality all works except for the leak-checker, which has + Linux-specific code: + % make heap-profiler_unittest.sh maybe_threads_unittest.sh \ + tcmalloc_unittest tcmalloc_both_unittest \ + tcmalloc_large_unittest # THESE WORK + % make -k heap-checker_unittest.sh \ + heap-checker-death_unittest.sh # THESE DO NOT + + Note that unless you specify --enable-heap-checker explicitly, + 'make' will not build the heap-checker unittests on a FreeBSD + system. + + I have not tested other *BSD systems, but they are probably similar. + +** Mac OS X: + + I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3 + [Panther] on both intel (x86) and PowerPC systems. For Panther + systems, perftools does not work at all: it depends on a header + file, OSAtomic.h, which is new in 10.4. (It's possible to get the + code working for Panther/i386 without too much work; if you're + interested in exploring this, drop an e-mail.) + + For the other seven systems, the binaries and libraries that + successfully build are exactly the same as for FreeBSD. See that + section for a list of binaries and instructions on building them. + + In addition, it appears OS X regularly fails profiler_unittest.sh + in the "thread" test (in addition to occassionally failing in the + "fork" test). It looks like OS X often delivers the profiling + signal to the main thread, even when it's sleeping, rather than + spawned threads that are doing actual work. If anyone knows + details of how OS X handles SIGPROF (via setitimer()) events with + threads, and has insight into this problem, please send mail to + google-perftools@googlegroups.com. + +** Solaris 10 x86: + + I've only tested using the GNU C++ compiler, not the Sun C++ + compiler. Using g++ requires setting the PATH appropriately when + configuring. + + % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin ./configure + % PATH=${PATH}:/usr/sfw/bin/:/usr/ccs/bin make [...] + + Again, the binaries and libraries that successfully build are + exactly the same as for FreeBSD. (However, while libprofiler.so can + be used to generate profiles, pprof is not very successful at + reading them -- necessary helper programs like nm don't seem + to be installed by default on Solaris, or perhaps are only + installed as part of the Sun C++ compiler package.) See that + section for a list of binaries, and instructions on building them. + +** Windows (MSVC, Cygwin, and MinGW): + + Work on Windows is rather preliminary: only tcmalloc_minimal is + supported. + + We haven't found a good way to get stack traces in release mode on + windows (that is, when FPO is enabled), so the heap profiling may + not be reliable in that case. Also, heap-checking and CPU profiling + do not yet work at all. But as in other ports, the basic tcmalloc + library functionality, overriding malloc and new and such (and even + windows-specific functions like _aligned_malloc!), is working fine, + at least with VC++ 7.1 (Visual Studio 2003) through VC++ 10.0, + in both debug and release modes. See README.windows for + instructions on how to install on Windows using Visual Studio. + + Cygwin can compile some but not all of perftools. Furthermore, + there is a problem with exception-unwinding in cygwin (it can call + malloc, which can call the exception-unwinding-setup code, which + can lead to an infinite loop). I've comitted a workaround to the + exception unwinding problem, but it only works in debug mode and + when statically linking in tcmalloc. I hope to have a more proper + fix in a later release. To configure under cygwin, run + + ./configure --disable-shared CXXFLAGS=-g && make + + Most of cygwin will compile (cygwin doesn't allow weak symbols, so + the heap-checker and a few other pieces of functionality will not + compile). 'make' will compile those libraries and tests that can + be compiled. You can run 'make check' to make sure the basic + functionality is working. I've heard reports that some versions of + cygwin fail calls to pthread_join() with EINVAL, causing several + tests to fail. If you have any insight into this, please mail + google-perftools@googlegroups.com. + + This Windows functionality is also available using MinGW and Msys, + In this case, you can use the regular './configure && make' + process. 'make install' should also work. The Makefile will limit + itself to those libraries and binaries that work on windows. + + +Basic Installation +================== + + These are generic installation instructions. + + The `configure' shell script attempts to guess correct values for +various system-dependent variables used during compilation. It uses +those values to create a `Makefile' in each directory of the package. +It may also create one or more `.h' files containing system-dependent +definitions. Finally, it creates a shell script `config.status' that +you can run in the future to recreate the current configuration, and a +file `config.log' containing compiler output (useful mainly for +debugging `configure'). + + It can also use an optional file (typically called `config.cache' +and enabled with `--cache-file=config.cache' or simply `-C') that saves +the results of its tests to speed up reconfiguring. (Caching is +disabled by default to prevent problems with accidental use of stale +cache files.) + + If you need to do unusual things to compile the package, please try +to figure out how `configure' could check whether to do them, and mail +diffs or instructions to the address given in the `README' so they can +be considered for the next release. If you are using the cache, and at +some point `config.cache' contains results you don't want to keep, you +may remove or edit it. + + The file `configure.ac' (or `configure.in') is used to create +`configure' by a program called `autoconf'. You only need +`configure.ac' if you want to change it or regenerate `configure' using +a newer version of `autoconf'. + +The simplest way to compile this package is: + + 1. `cd' to the directory containing the package's source code and type + `./configure' to configure the package for your system. If you're + using `csh' on an old version of System V, you might need to type + `sh ./configure' instead to prevent `csh' from trying to execute + `configure' itself. + + Running `configure' takes awhile. While running, it prints some + messages telling which features it is checking for. + + 2. Type `make' to compile the package. + + 3. Optionally, type `make check' to run any self-tests that come with + the package. + + 4. Type `make install' to install the programs and any data files and + documentation. + + 5. You can remove the program binaries and object files from the + source code directory by typing `make clean'. To also remove the + files that `configure' created (so you can compile the package for + a different kind of computer), type `make distclean'. There is + also a `make maintainer-clean' target, but that is intended mainly + for the package's developers. If you use it, you may have to get + all sorts of other programs in order to regenerate files that came + with the distribution. + +Compilers and Options +===================== + + Some systems require unusual options for compilation or linking that +the `configure' script does not know about. Run `./configure --help' +for details on some of the pertinent environment variables. + + You can give `configure' initial values for configuration parameters +by setting variables in the command line or in the environment. Here +is an example: + + ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix + + *Note Defining Variables::, for more details. + +Compiling For Multiple Architectures +==================================== + + You can compile the package for more than one kind of computer at the +same time, by placing the object files for each architecture in their +own directory. To do this, you must use a version of `make' that +supports the `VPATH' variable, such as GNU `make'. `cd' to the +directory where you want the object files and executables to go and run +the `configure' script. `configure' automatically checks for the +source code in the directory that `configure' is in and in `..'. + + If you have to use a `make' that does not support the `VPATH' +variable, you have to compile the package for one architecture at a +time in the source code directory. After you have installed the +package for one architecture, use `make distclean' before reconfiguring +for another architecture. + +Installation Names +================== + + By default, `make install' will install the package's files in +`/usr/local/bin', `/usr/local/man', etc. You can specify an +installation prefix other than `/usr/local' by giving `configure' the +option `--prefix=PATH'. + + You can specify separate installation prefixes for +architecture-specific files and architecture-independent files. If you +give `configure' the option `--exec-prefix=PATH', the package will use +PATH as the prefix for installing programs and libraries. +Documentation and other data files will still use the regular prefix. + + In addition, if you use an unusual directory layout you can give +options like `--bindir=PATH' to specify different values for particular +kinds of files. Run `configure --help' for a list of the directories +you can set and what kinds of files go in them. + + If the package supports it, you can cause programs to be installed +with an extra prefix or suffix on their names by giving `configure' the +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. + +Optional Features +================= + + Some packages pay attention to `--enable-FEATURE' options to +`configure', where FEATURE indicates an optional part of the package. +They may also pay attention to `--with-PACKAGE' options, where PACKAGE +is something like `gnu-as' or `x' (for the X Window System). The +`README' should mention any `--enable-' and `--with-' options that the +package recognizes. + + For packages that use the X Window System, `configure' can usually +find the X include and library files automatically, but if it doesn't, +you can use the `configure' options `--x-includes=DIR' and +`--x-libraries=DIR' to specify their locations. + +Specifying the System Type +========================== + + There may be some features `configure' cannot figure out +automatically, but needs to determine by the type of machine the package +will run on. Usually, assuming the package is built to be run on the +_same_ architectures, `configure' can figure that out, but if it prints +a message saying it cannot guess the machine type, give it the +`--build=TYPE' option. TYPE can either be a short name for the system +type, such as `sun4', or a canonical name which has the form: + + CPU-COMPANY-SYSTEM + +where SYSTEM can have one of these forms: + + OS KERNEL-OS + + See the file `config.sub' for the possible values of each field. If +`config.sub' isn't included in this package, then this package doesn't +need to know the machine type. + + If you are _building_ compiler tools for cross-compiling, you should +use the `--target=TYPE' option to select the type of system they will +produce code for. + + If you want to _use_ a cross compiler, that generates code for a +platform different from the build platform, you should specify the +"host" platform (i.e., that on which the generated programs will +eventually be run) with `--host=TYPE'. + +Sharing Defaults +================ + + If you want to set default values for `configure' scripts to share, +you can create a site shell script called `config.site' that gives +default values for variables like `CC', `cache_file', and `prefix'. +`configure' looks for `PREFIX/share/config.site' if it exists, then +`PREFIX/etc/config.site' if it exists. Or, you can set the +`CONFIG_SITE' environment variable to the location of the site script. +A warning: not all `configure' scripts look for a site script. + +Defining Variables +================== + + Variables not defined in a site shell script can be set in the +environment passed to `configure'. However, some packages may run +configure again during the build, and the customized values of these +variables may be lost. In order to avoid this problem, you should set +them in the `configure' command line, using `VAR=value'. For example: + + ./configure CC=/usr/local2/bin/gcc + +will cause the specified gcc to be used as the C compiler (unless it is +overridden in the site shell script). + +`configure' Invocation +====================== + + `configure' recognizes the following options to control how it +operates. + +`--help' +`-h' + Print a summary of the options to `configure', and exit. + +`--version' +`-V' + Print the version of Autoconf used to generate the `configure' + script, and exit. + +`--cache-file=FILE' + Enable the cache: use and save the results of the tests in FILE, + traditionally `config.cache'. FILE defaults to `/dev/null' to + disable caching. + +`--config-cache' +`-C' + Alias for `--cache-file=config.cache'. + +`--quiet' +`--silent' +`-q' + Do not print messages saying which checks are being made. To + suppress all normal output, redirect it to `/dev/null' (any error + messages will still be shown). + +`--srcdir=DIR' + Look for the package's source code in directory DIR. Usually + `configure' can determine that directory automatically. + +`configure' also accepts some other, not widely useful, options. Run +`configure --help' for more details. diff --git a/src/third_party/gperftools-2.7/NEWS b/src/third_party/gperftools-2.7/NEWS new file mode 100644 index 00000000000..9938f0af77d --- /dev/null +++ b/src/third_party/gperftools-2.7/NEWS @@ -0,0 +1,1003 @@ +== 29 Apr 2018 == +gperftools 2.7 is out! + +Few people contributed minor, but important fixes since rc. + +Changes: + +* bug in span stats printing introduced by new scalable page heap + change was fixed. + +* Christoph Müllner has contributed couple warnings fixes and initial + support for aarch64_ilp32 architecture. + +* Ben Dang contributed documentation fix for heap checker. + +* Fabrice Fontaine contributed fixed for linking benchmarks with + --disable-static. + +* Holy Wu has added sized deallocation unit tests. + +* Holy Wu has enabled support of sized deallocation (c++14) on recent + MSVC. + +* Holy Wu has fixed MSVC build in WIN32_OVERRIDE_ALLOCATORS mode. This + closed issue #716. + +* Holy Wu has contributed cleanup of config.h used on windows. + +* Mao Huang has contributed couple simple tcmalloc changes from + chromium code base. Making our tcmalloc forks a tiny bit closer. + +* issue #946 that caused compilation failures on some Linux clang + installations has been fixed. Much thanks to github user htuch for + helping to diagnose issue and proposing a fix. + +* Tulio Magno Quites Machado Filho has contributed build-time fix for + PPC (for problem introduced in one of commits since RC). + +== 18 Mar 2018 == +gperftools 2.7rc is out! + +Changes: + +* Most notable change in this release is that very large allocations + (>1MiB) are now handled be O(log n) implementation. This is + contributed by Todd Lipcon based on earlier work by Aliaksei + Kandratsenka and James Golick. Special thanks to Alexey Serbin for + contributing OSX fix for that commit. + +* detection of sized deallocation support is improved. Which should + fix another set of issues building on OSX. Much thanks to Alexey + Serbin for reporting the issue, suggesting a fix and verifying it. + +* Todd Lipcon made a change to extend page heaps freelists to 1 MiB + (up from 1MiB - 8KiB). This may help a little for some workloads. + +* Ishan Arora contributed typo fix to docs + +== 9 Dec 2017 == +gperftools 2.6.3 is out! + +Just two fixes were made in this release: + +* Stephan Zuercher has contributed a build fix for some recent XCode + versions. See issue #942 for more details. + +* assertion failure on some windows builds introduced by 2.6.2 was + fixed. Thanks to github user nkeemik for reporting it and testing + fix. See issue #944 for more details. + +== 30 Nov 2017 == +gperftools 2.6.2 is out! + +Most notable change is recently added support for C++17 over-aligned +allocation operators contributed by Andrey Semashev. I've extended his +implemention to have roughly same performance as malloc/new. This +release also has native support for C11 aligned_alloc. + +Rest is mostly bug fixes: + +* Jianbo Yang has contributed a fix for potentially severe data race + introduced by malloc fast-path work in gperftools 2.6. This race + could cause occasional violation of total thread cache size + constraint. See issue #929 for more details. + +* Correct behavior in out-of-memory condition in fast-path cases was + restored. This was another bug introduced by fast-path optimization + in gperftools 2.6 which caused operator new to silently return NULL + instead of doing correct C++ OOM handling (calling new_handler and + throwing bad_alloc). + +* Khem Raj has contributed couple build fixes for newer glibcs (ucontext_t vs + struct ucontext and loff_t definition) + +* Piotr Sikora has contributed build fix for OSX (not building unwind + benchmark). This was issue #910 (thanks to Yuriy Solovyov for + reporting it). + +* Dorin Lazăr has contributed fix for compiler warning + +* issue #912 (occasional deadlocking calling getenv too early on + windows) was fixed. Thanks to github user shangcangriluo for + reporting it. + +* Couple earlier lsan-related commits still causing occasional issues + linking on OSX has been reverted. See issue #901. + +* Volodimir Krylov has contributed GetProgramInvocationName for FreeBSD + +* changsu lee has contributed couple minor correctness fixes (missing + va_end() and missing free() call in rarely executed Symbolize path) + +* Andrew C. Morrow has contributed some more page heap stats. See issue + #935. + +* some cases of built-time warnings from various gcc/clang versions + about throw() declarations have been fixes. + +== 9 July 2017 == + +gperftools 2.6.1 is out! This is mostly bug-fixes release. + +* issue #901: build issue on OSX introduced in last-time commit in 2.6 + was fixed (contributed by Francis Ricci) + +* tcmalloc_minimal now works on 32-bit ABI of mips64. This is issue + #845. Much thanks to Adhemerval Zanella and github user mtone. + +* Romain Geissler contributed build fix for -std=c++17. This is pull + request #897. + +* As part of fixing issue #904, tcmalloc atfork handler is now + installed early. This should fix slight chance of hitting deadlocks + at fork in some cases. + +== 4 July 2017 == + +gperftools 2.6 is out! + +* Kim Gräsman contributed documentation update for HEAPPROFILESIGNAL + environment variable + +* KernelMaker contributed fix for population of min_object_size field + returned by MallocExtension::GetFreeListSizes + +* commit 8c3dc52fcfe0 "issue-654: [pprof] handle split text segments" + was reverted. Some OSX users reported issues with this commit. Given + our pprof implementation is strongly deprecated it is best to drop + recently introduced features rather than breaking it badly. + +* Francis Ricci contributed improvement for interaction with leak + sanitizer. + +== 22 May 2017 == + +gperftools 2.6rc4 is out! + +Dynamic sized delete is disabled by default again. There is no hope of +it working with eager dynamic symbols resolution (-z now linker +flag). More details in +https://bugzilla.redhat.com/show_bug.cgi?id=1452813 + +== 21 May 2017 == + +gperftools 2.6rc3 is out! + +gperftools compilation on older systems (e.g. rhel 5) was fixed. This +was originally reported in github issue #888. + +== 14 May 2017 == + +gperftools 2.6rc2 is out! + +Just 2 small fixes on top of 2.6rc. Particularly, Rajalakshmi +Srinivasaraghavan contributed build fix for ppc32. + +== 14 May 2017 == + +gperftools 2.6rc is out! + +Highlights of this release are performance work on malloc fast-path +and support for more modern visual studio runtimes, and deprecation of +bundled pprof. Another significant performance-affecting changes are +reverting central free list transfer batch size back to 32 and +disabling of aggressive decommit mode by default. + +Note, while we still ship perl implementation of pprof, everyone is +strongly advised to use golang reimplementation of pprof from +https://github.com/google/pprof. + +Here are notable changes in more details (and see ChangeLog for full +details): + +* a bunch of performance tweaks to tcmalloc fast-path were + merged. This speeds up critical path of tcmalloc by few tens of + %. Well tuned and allocation-heavy programs should see substantial + performance boost (should apply to all modern elf platforms). This + is based on Google-internal tcmalloc changes for fast-path (with + obvious exception of lacking per-cpu mode, of course). Original + changes were made by Aliaksei Kandratsenka. And Andrew Hunter, + Dmitry Vyukov and Sanjay Ghemawat contributed with reviews and + discussions. + +* Architectures with 48 bits address space (x86-64 and aarch64) now + use faster 2 level page map. This was ported from Google-internal + change by Sanjay Ghemawat. + +* Default value of TCMALLOC_TRANSFER_NUM_OBJ was returned back to + 32. Larger values have been found to hurt certain programs (but help + some other benchmarks). Value can still be tweaked at run time via + environment variable. + +* tcmalloc aggressive decommit mode is now disabled by default + again. It was found to degrade performance of certain tensorflow + benchmarks. Users who prefer smaller heap over small performance win + can still set environment variable TCMALLOC_AGGRESSIVE_DECOMMIT=t. + +* runtime switchable sized delete support has be fixed and re-enabled + (on GNU/Linux). Programs that use C++ 14 or later that use sized + delete can again be sped up by setting environment variable + TCMALLOC_ENABLE_SIZED_DELETE=t. Support for enabling sized + deallication support at compile-time is still present, of course. + +* tcmalloc now explicitly avoids use of MADV_FREE on Linux, unless + TCMALLOC_USE_MADV_FREE is defined at compile time. This is because + performance impact of MADV_FREE is not well known. Original issue + #780 raised by Mathias Stearn. + +* issue #786 with occasional deadlocks in stack trace capturing via + libunwind was fixed. It was originally reported as Ceph issue: + http://tracker.ceph.com/issues/13522 + +* ChangeLog is now automatically generated from git log. Old ChangeLog + is now ChangeLog.old. + +* tcmalloc now provides implementation of nallocx. Function was + originally introduced by jemalloc and can be used to return real + allocation size given allocation request size. This is ported from + Google-internal tcmalloc change contributed by Dmitry Vyukov. + +* issue #843 which made tcmalloc crash when used with erlang runtime + was fixed. + +* issue #839 which caused tcmalloc's aggressive decommit mode to + degrade performance in some corner cases was fixed. + +* Bryan Chan contributed support for 31-bit s390. + +* Brian Silverman contributed compilation fix for 32-bit ARMs + +* Issue #817 that was causing tcmalloc to fail on windows 10 and + later, as well as on recent msvc was fixed. We now patch _free_base + as well. + +* a bunch of minor documentaion/typos fixes by: Mike Gaffney + <mike@uberu.com>, iivlev <iivlev@productengine.com>, savefromgoogle + <savefromgoogle@users.noreply.github.com>, John McDole + <jtmcdole@gmail.com>, zmertens <zmertens@asu.edu>, Kirill Müller + <krlmlr@mailbox.org>, Eugene <n.eugene536@gmail.com>, Ola Olsson + <ola1olsson@gmail.com>, Mostyn Bramley-Moore <mostynb@opera.com> + +* Tulio Magno Quites Machado Filho has contributed removal of + deprecated glibc malloc hooks. + +* Issue #827 that caused intercepting malloc on osx 10.12 to fail was + fixed, by copying fix made by Mike Hommey to jemalloc. Much thanks + to Koichi Shiraishi and David Ribeiro Alves for reporting it and + testing fix. + +* Aman Gupta and Kenton Varda contributed minor fixes to pprof (but + note again that pprof is deprecated) + +* Ryan Macnak contributed compilation fix for aarch64 + +* Francis Ricci has fixed unaligned memory access in debug allocator + +* TCMALLOC_PAGE_FENCE_NEVER_RECLAIM now actually works thanks to + contribution by Andrew Morrow. + +== 12 Mar 2016 == + +gperftools 2.5 is out! + +Just single bugfix was merged after rc2. Which was fix for issue #777. + +== 5 Mar 2016 == + +gperftools 2.5rc2 is out! + +New release contains just few commits on top of first release +candidate. One of them is build fix for Visual Studio. Another +significant change is that dynamic sized delete is now disabled by +default. It turned out that IFUNC relocations are not supporting our +advanced use case on all platforms and in all cases. + +== 21 Feb 2016 == + +gperftools 2.5rc is out! + +Here are major changes since 2.4: + +* we've moved to github! + +* Bryan Chan has contributed s390x support + +* stacktrace capturing via libgcc's _Unwind_Backtrace was implemented + (for architectures with missing or broken libunwind). + +* "emergency malloc" was implemented. Which unbreaks recursive calls + to malloc/free from stacktrace capturing functions (such us glib'c + backtrace() or libunwind on arm). It is enabled by + --enable-emergency-malloc configure flag or by default on arm when + --enable-stacktrace-via-backtrace is given. It is another fix for a + number common issues people had on platforms with missing or broken + libunwind. + +* C++14 sized-deallocation is now supported (on gcc 5 and recent + clangs). It is off by default and can be enabled at configure time + via --enable-sized-delete. On GNU/Linux it can also be enabled at + run-time by either TCMALLOC_ENABLE_SIZED_DELETE environment variable + or by defining tcmalloc_sized_delete_enabled function which should + return 1 to enable it. + +* we've lowered default value of transfer batch size to 512. Previous + value (bumped up in 2.1) was too high and caused performance + regression for some users. 512 should still give us performance + boost for workloads that need higher transfer batch size while not + penalizing other workloads too much. + +* Brian Silverman's patch finally stopped arming profiling timer + unless profiling is started. + +* Andrew Morrow has contributed support for obtaining cache size of the + current thread and softer idling (for use in MongoDB). + +* we've implemented few minor performance improvements, particularly + on malloc fast-path. + +A number of smaller fixes were made. Many of them were contributed: + +* issue that caused spurious profiler_unittest.sh failures was fixed. + +* Jonathan Lambrechts contributed improved callgrind format support to + pprof. + +* Matt Cross contributed better support for debug symbols in separate + files to pprof. + +* Matt Cross contributed support for printing collapsed stack frame + from pprof aimed at producing flame graphs. + +* Angus Gratton has contributed documentation fix mentioning that on + windows only tcmalloc_minimal is supported. + +* Anton Samokhvalov has made tcmalloc use mi_force_{un,}lock on OSX + instead of pthread_atfork. Which apparently fixes forking + issues tcmalloc had on OSX. + +* Milton Chiang has contributed support for building 32-bit gperftools + on arm8. + +* Patrick LoPresti has contributed support for specifying alternative + profiling signal via CPUPROFILE_TIMER_SIGNAL environment variable. + +* Paolo Bonzini has contributed support configuring filename for + sending malloc tracing output via TCMALLOC_TRACE_FILE environment + variable. + +* user spotrh has enabled use of futex on arm. + +* user mitchblank has contributed better declaration for arg-less + profiler functions. + +* Tom Conerly contributed proper freeing of memory allocated in + HeapProfileTable::FillOrderedProfile on error paths. + +* user fdeweerdt has contributed curl arguments handling fix in pprof + +* Frederik Mellbin fixed tcmalloc's idea of mangled new and delete + symbols on windows x64 + +* Dair Grant has contributed cacheline alignment for ThreadCache + objects + +* Fredrik Mellbin has contributed updated windows/config.h for Visual + Studio 2015 and other windows fixes. + +* we're not linking libpthread to libtcmalloc_minimal anymore. Instead + libtcmalloc_minimal links to pthread symbols weakly. As a result + single-threaded programs remain single-threaded when linking to or + preloading libtcmalloc_minimal.so. + +* Boris Sazonov has contributed mips compilation fix and printf misue + in pprof. + +* Adhemerval Zanella has contributed alignment fixes for statically + allocated variables. + +* Jens Rosenboom has contributed fixes for heap-profiler_unittest.sh + +* gshirishfree has contributed better description for GetStats method. + +* cyshi has contributed spinlock pause fix. + +* Chris Mayo has contributed --docdir argument support for configure. + +* Duncan Sands has contributed fix for function aliases. + +* Simon Que contributed better include for malloc_hook_c.h + +* user wmamrak contributed struct timespec fix for Visual Studio 2015. + +* user ssubotin contributed typo in PrintAvailability code. + + +== 10 Jan 2015 == + +gperftools 2.4 is out! The code is exactly same as 2.4rc. + +== 28 Dec 2014 == + +gperftools 2.4rc is out! + +Here are changes since 2.3: + +* enabled aggressive decommit option by default. It was found to + significantly improve memory fragmentation with negligible impact on + performance. (Thanks to investigation work performed by Adhemerval + Zanella) + +* added ./configure flags for tcmalloc pagesize and tcmalloc + allocation alignment. Larger page sizes have been reported to + improve performance occasionally. (Patch by Raphael Moreira Zinsly) + +* sped-up hot-path of malloc/free. By about 5% on static library and + about 10% on shared library. Mainly due to more efficient checking + of malloc hooks. + +* improved stacktrace capturing in cpu profiler (due to issue found by + Arun Sharma). As part of that issue pprof's handling of cpu profiles + was also improved. + +== 7 Dec 2014 == + +gperftools 2.3 is out! + +Here are changes since 2.3rc: + +* (issue 658) correctly close socketpair fds on failure (patch by glider) + +* libunwind integration can be disabled at configure time (patch by + Raphael Moreira Zinsly) + +* libunwind integration is disabled by default for ppc64 (patch by + Raphael Moreira Zinsly) + +* libunwind integration is force-disabled for OSX. It was not used by + default anyways. Fixes compilation issue I saw. + +== 2 Nov 2014 == + +gperftools 2.3rc is out! + +Most small improvements in this release were made to pprof tool. + +New experimental Linux-only (for now) cpu profiling mode is a notable +big improvement. + +Here are notable changes since 2.2.1: + +* (issue-631) fixed debugallocation miscompilation on mmap-less + platforms (courtesy of user iamxujian) + +* (issue-630) reference to wrong PROFILE (vs. correct CPUPROFILE) + environment variable was fixed (courtesy of WenSheng He) + +* pprof now has option to display stack traces in output for heap + checker (courtesy of Michael Pasieka) + +* (issue-636) pprof web command now works on mingw + +* (issue-635) pprof now handles library paths that contain spaces + (courtesy of user mich...@sebesbefut.com) + +* (issue-637) pprof now has an option to not strip template arguments + (patch by jiakai) + +* (issue-644) possible out-of-bounds access in GetenvBeforeMain was + fixed (thanks to user abyss.7) + +* (issue-641) pprof now has an option --show_addresses (thanks to user + yurivict). New option prints instruction address in addition to + function name in stack traces + +* (issue-646) pprof now works around some issues of addr2line + reportedly when DWARF v4 format is used (patch by Adam McNeeney) + +* (issue-645) heap profiler exit message now includes remaining memory + allocated info (patch by user yurivict) + +* pprof code that finds location of /proc/<pid>/maps in cpu profile + files is now fixed (patch by Ricardo M. Correia) + +* (issue-654) pprof now handles "split text segments" feature of + Chromium for Android. (patch by simonb) + +* (issue-655) potential deadlock on windows caused by early call to + getenv in malloc initialization code was fixed (bug reported and fix + proposed by user zndmitry) + +* incorrect detection of arm 6zk instruction set support + (-mcpu=arm1176jzf-s) was fixed. (Reported by pedronavf on old + issue-493) + +* new cpu profiling mode on Linux is now implemented. It sets up + separate profiling timers for separate threads. Which improves + accuracy of profiling on Linux a lot. It is off by default. And is + enabled if both librt.f is loaded and CPUPROFILE_PER_THREAD_TIMERS + environment variable is set. But note that all threads need to be + registered via ProfilerRegisterThread. + +== 21 Jun 2014 == + +gperftools 2.2.1 is out! + +Here's list of fixes: + +* issue-626 was closed. Which fixes initialization statically linked + tcmalloc. + +* issue 628 was closed. It adds missing header file into source + tarball. This fixes for compilation on PPC Linux. + +== 3 May 2014 == + +gperftools 2.2 is out! + +Here are notable changes since 2.2rc: + +* issue 620 (crash on windows when c runtime dll is reloaded) was + fixed + +== 19 Apr 2014 == + +gperftools 2.2rc is out! + +Here are notable changes since 2.1: + +* a number of fixes for a number compilers and platforms. Notably + Visual Studio 2013, recent mingw with c++ threads and some OSX + fixes. + +* we now have mips and mips64 support! (courtesy of Jovan Zelincevic, + Jean Lee, user xiaoyur347 and others) + +* we now have aarch64 (aka arm64) support! (contributed by Riku + Voipio) + +* there's now support for ppc64-le (by Raphael Moreira Zinsly and + Adhemerval Zanella) + +* there's now some support of uclibc (contributed by user xiaoyur347) + +* google/ headers will now give you deprecation warning. They are + deprecated since 2.0 + +* there's now new api: tc_malloc_skip_new_handler (ported from chromium + fork) + +* issue-557: added support for dumping heap profile via signal (by + Jean Lee) + +* issue-567: Petr Hosek contributed SysAllocator support for windows + +* Joonsoo Kim contributed several speedups for central freelist code + +* TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES environment variable now works + +* configure scripts are now using AM_MAINTAINER_MODE. It'll only + affect folks who modify source from .tar.gz and want automake to + automatically rebuild Makefile-s. See automake documentation for + that. + +* issue-586: detect main executable even if PIE is active (based on + patch by user themastermind1). Notably, it fixes profiler use with + ruby. + +* there is now support for switching backtrace capturing method at + runtime (via TCMALLOC_STACKTRACE_METHOD and + TCMALLOC_STACKTRACE_METHOD_VERBOSE environment variables) + +* there is new backtrace capturing method using -finstrument-functions + prologues contributed by user xiaoyur347 + +* few cases of crashes/deadlocks in profiler were addressed. See + (famous) issue-66, issue-547 and issue-579. + +* issue-464 (memory corruption in debugalloc's realloc after + memallign) is now fixed + +* tcmalloc is now able to release memory back to OS on windows + (issue-489). The code was ported from chromium fork (by a number of + authors). + +* Together with issue-489 we ported chromium's "aggressive decommit" + mode. In this mode (settable via malloc extension and via + environment variable TCMALLOC_AGGRESSIVE_DECOMMIT), free pages are + returned back to OS immediately. + +* MallocExtension::instance() is now faster (based on patch by + Adhemerval Zanella) + +* issue-610 (hangs on windows in multibyte locales) is now fixed + +The following people helped with ideas or patches (based on git log, +some contributions purely in bugtracker might be missing): Andrew +C. Morrow, yurivict, Wang YanQing, Thomas Klausner, +davide.italiano@10gen.com, Dai MIKURUBE, Joon-Sung Um, Jovan +Zelincevic, Jean Lee, Petr Hosek, Ben Avison, drussel, Joonsoo Kim, +Hannes Weisbach, xiaoyur347, Riku Voipio, Adhemerval Zanella, Raphael +Moreira Zinsly + +== 30 July 2013 == + +gperftools 2.1 is out! + +Just few fixes where merged after rc. Most notably: + +* Some fixes for debug allocation on POWER/Linux + +== 20 July 2013 == + +gperftools 2.1rc is out! + +As a result of more than a year of contributions we're ready for 2.1 +release. + +But before making that step I'd like to create RC and make sure people +have chance to test it. + +Here are notable changes since 2.0: + +* fixes for building on newer platforms. Notably, there's now initial + support for x32 ABI (--enable-minimal only at this time)) + +* new getNumericProperty stats for cache sizes + +* added HEAP_PROFILER_TIME_INTERVAL variable (see documentation) + +* added environment variable to control heap size (TCMALLOC_HEAP_LIMIT_MB) + +* added environment variable to disable release of memory back to OS + (TCMALLOC_DISABLE_MEMORY_RELEASE) + +* cpu profiler can now be switched on and off by sending it a signal + (specified in CPUPROFILESIGNAL) + +* (issue 491) fixed race-ful spinlock wake-ups + +* (issue 496) added some support for fork-ing of process that is using + tcmalloc + +* (issue 368) improved memory fragmentation when large chunks of + memory are allocated/freed + +== 03 February 2012 == + +I've just released gperftools 2.0 + +The `google-perftools` project has been renamed to `gperftools`. I +(csilvers) am stepping down as maintainer, to be replaced by +David Chappelle. Welcome to the team, David! David has been an +an active contributor to perftools in the past -- in fact, he's the +only person other than me that already has commit status. I am +pleased to have him take over as maintainer. + +I have both renamed the project (the Google Code site renamed a few +weeks ago), and bumped the major version number up to 2, to reflect +the new community ownership of the project. Almost all the +[http://gperftools.googlecode.com/svn/tags/gperftools-2.0/ChangeLog changes] +are related to the renaming. + +The main functional change from google-perftools 1.10 is that +I've renamed the `google/` include-directory to be `gperftools/` +instead. New code should `#include <gperftools/tcmalloc.h>`/etc. +(Most users of perftools don't need any perftools-specific includes at +all, so this is mostly directed to "power users.") I've kept the old +names around as forwarding headers to the new, so `#include +<google/tcmalloc.h>` will continue to work. + +(The other functional change which I snuck in is getting rid of some +bash-isms in one of the unittest driver scripts, so it could run on +Solaris.) + +Note that some internal names still contain the text `google`, such as +the `google_malloc` internal linker section. I think that's a +trickier transition, and can happen in a future release (if at all). + + +=== 31 January 2012 === + +I've just released perftools 1.10 + +There is an API-incompatible change: several of the methods in the +`MallocExtension` class have changed from taking a `void*` to taking a +`const void*`. You should not be affected by this API change +unless you've written your own custom malloc extension that derives +from `MallocExtension`, but since it is a user-visible change, I have +upped the `.so` version number for this release. + +This release focuses on improvements to linux-syscall-support.h, +including ARM and PPC fixups and general cleanups. I hope this will +magically fix an array of bugs people have been seeing. + +There is also exciting news on the porting front, with support for +patching win64 assembly contributed by IBM Canada! This is an +important step -- perhaps the most difficult -- to getting perftools +to work on 64-bit windows using the patching technique (it doesn't +affect the libc-modification technique). `premable_patcher_test` has +been added to help test these changes; it is meant to compile under +x86_64, and won't work under win32. + +For the full list of changes, including improved `HEAP_PROFILE_MMAP` +support, see the +[http://gperftools.googlecode.com/svn/tags/google-perftools-1.10/ChangeLog ChangeLog]. + + +=== 24 January 2011 === + +The `google-perftools` Google Code page has been renamed to +`gperftools`, in preparation for the project being renamed to +`gperftools`. In the coming weeks, I'll be stepping down as +maintainer for the perftools project, and as part of that Google is +relinquishing ownership of the project; it will now be entirely +community run. The name change reflects that shift. The 'g' in +'gperftools' stands for 'great'. :-) + +=== 23 December 2011 === + +I've just released perftools 1.9.1 + +I missed including a file in the tarball, that is needed to compile on +ARM. If you are not compiling on ARM, or have successfully compiled +perftools 1.9, there is no need to upgrade. + + +=== 22 December 2011 === + +I've just released perftools 1.9 + +This change has a slew of improvements, from better ARM and freebsd +support, to improved performance by moving some code outside of locks, +to better pprof reporting of code with overloaded functions. + +The full list of changes is in the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.9/ChangeLog ChangeLog]. + + +=== 26 August 2011 === + +I've just released perftools 1.8.3 + +The star-crossed 1.8 series continues; in 1.8.1, I had accidentally +removed some code that was needed for FreeBSD. (Without this code +many apps would crash at startup.) This release re-adds that code. +If you are not on FreeBSD, or are using FreeBSD with perftools 1.8 or +earlier, there is no need to upgrade. + +=== 11 August 2011 === + +I've just released perftools 1.8.2 + +I was incorrectly calculating the patch-level in the configuration +step, meaning the TC_VERSION_PATCH #define in tcmalloc.h was wrong. +Since the testing framework checks for this, it was failing. Now it +should work again. This time, I was careful to re-run my tests after +upping the version number. :-) + +If you don't care about the TC_VERSION_PATCH #define, there's no +reason to upgrae. + +=== 26 July 2011 === + +I've just released perftools 1.8.1 + +I was missing an #include that caused the build to break under some +compilers, especially newer gcc's, that wanted it. This only affects +people who build from source, so only the .tar.gz file is updated from +perftools 1.8. If you didn't have any problems compiling perftools +1.8, there's no reason to upgrade. + +=== 15 July 2011 === + +I've just released perftools 1.8 + +Of the many changes in this release, a good number pertain to porting. +I've revamped OS X support to use the malloc-zone framework; it should +now Just Work to link in tcmalloc, without needing +`DYLD_FORCE_FLAT_NAMESPACE` or the like. (This is a pretty major +change, so please feel free to report feedback at +google-perftools@googlegroups.com.) 64-bit Windows support is also +improved, as is ARM support, and the hooks are in place to improve +FreeBSD support as well. + +On the other hand, I'm seeing hanging tests on Cygwin. I see the same +hanging even with (the old) perftools 1.7, so I'm guessing this is +either a problem specific to my Cygwin installation, or nobody is +trying to use perftools under Cygwin. If you can reproduce the +problem, and even better have a solution, you can report it at +google-perftools@googlegroups.com. + +Internal changes include several performance and space-saving tweaks. +One is user-visible (but in "stealth mode", and otherwise +undocumented): you can compile with `-DTCMALLOC_SMALL_BUT_SLOW`. In +this mode, tcmalloc will use less memory overhead, at the cost of +running (likely not noticeably) slower. + +There are many other changes as well, too numerous to recount here, +but present in the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.8/ChangeLog ChangeLog]. + + +=== 7 February 2011 === + +Thanks to endlessr..., who +[http://code.google.com/p/google-perftools/issues/detail?id=307 identified] +why some tests were failing under MSVC 10 in release mode. It does not look +like these failures point toward any problem with tcmalloc itself; rather, the +problem is with the test, which made some assumptions that broke under the +some aggressive optimizations used in MSVC 10. I'll fix the test, but in +the meantime, feel free to use perftools even when compiled under MSVC +10. + +=== 4 February 2011 === + +I've just released perftools 1.7 + +I apologize for the delay since the last release; so many great new +patches and bugfixes kept coming in (and are still coming in; I also +apologize to those folks who have to slip until the next release). I +picked this arbitrary time to make a cut. + +Among the many new features in this release is a multi-megabyte +reduction in the amount of tcmalloc overhead uder x86_64, improved +performance in the case of contention, and many many bugfixes, +especially architecture-specific bugfixes. See the +[http://google-perftools.googlecode.com/svn/tags/google-perftools-1.7/ChangeLog ChangeLog] +for full details. + +One architecture-specific change of note is added comments in the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.7/README README] +for using tcmalloc under OS X. I'm trying to get my head around the +exact behavior of the OS X linker, and hope to have more improvements +for the next release, but I hope these notes help folks who have been +having trouble with tcmalloc on OS X. + +*Windows users*: I've heard reports that some unittests fail on +Windows when compiled with MSVC 10 in Release mode. All tests pass in +Debug mode. I've not heard of any problems with earlier versions of +MSVC. I don't know if this is a problem with the runtime patching (so +the static patching discussed in README_windows.txt will still work), +a problem with perftools more generally, or a bug in MSVC 10. Anyone +with windows expertise that can debug this, I'd be glad to hear from! + + +=== 5 August 2010 === + +I've just released perftools 1.6 + +This version also has a large number of minor changes, including +support for `malloc_usable_size()` as a glibc-compatible alias to +`malloc_size()`, the addition of SVG-based output to `pprof`, and +experimental support for tcmalloc large pages, which may speed up +tcmalloc at the cost of greater memory use. To use tcmalloc large +pages, see the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.6/INSTALL +INSTALL file]; for all changes, see the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.6/ChangeLog +ChangeLog]. + +OS X NOTE: improvements in the profiler unittest have turned up an OS +X issue: in multithreaded programs, it seems that OS X often delivers +the profiling signal (from sigitimer()) to the main thread, even when +it's sleeping, rather than spawned threads that are doing actual work. +If anyone knows details of how OS X handles SIGPROF events (from +setitimer) in threaded programs, and has insight into this problem, +please send mail to google-perftools@googlegroups.com. + +To see if you're affected by this, look for profiling time that pprof +attributes to `___semwait_signal`. This is work being done in other +threads, that is being attributed to sleeping-time in the main thread. + + +=== 20 January 2010 === + +I've just released perftools 1.5 + +This version has a slew of changes, leading to somewhat faster +performance and improvements in portability. It adds features like +`ITIMER_REAL` support to the cpu profiler, and `tc_set_new_mode` to +mimic the windows function of the same name. Full details are in the +[http://google-perftools.googlecode.com/svn/tags/perftools-1.5/ChangeLog +ChangeLog]. + + +=== 11 September 2009 === + +I've just released perftools 1.4 + +The major change this release is the addition of a debugging malloc +library! If you link with `libtcmalloc_debug.so` instead of +`libtcmalloc.so` (and likewise for the `minimal` variants) you'll get +a debugging malloc, which will catch double-frees, writes to freed +data, `free`/`delete` and `delete`/`delete[]` mismatches, and even +(optionally) writes past the end of an allocated block. + +We plan to do more with this library in the future, including +supporting it on Windows, and adding the ability to use the debugging +library with your default malloc in addition to using it with +tcmalloc. + +There are also the usual complement of bug fixes, documented in the +ChangeLog, and a few minor user-tunable knobs added to components like +the system allocator. + + +=== 9 June 2009 === + +I've just released perftools 1.3 + +Like 1.2, this has a variety of bug fixes, especially related to the +Windows build. One of my bugfixes is to undo the weird `ld -r` fix to +`.a` files that I introduced in perftools 1.2: it caused problems on +too many platforms. I've reverted back to normal `.a` files. To work +around the original problem that prompted the `ld -r` fix, I now +provide `libtcmalloc_and_profiler.a`, for folks who want to link in +both. + +The most interesting API change is that I now not only override +`malloc`/`free`/etc, I also expose them via a unique set of symbols: +`tc_malloc`/`tc_free`/etc. This enables clients to write their own +memory wrappers that use tcmalloc: +{{{ + void* malloc(size_t size) { void* r = tc_malloc(size); Log(r); return r; } +}}} + + +=== 17 April 2009 === + +I've just released perftools 1.2. + +This is mostly a bugfix release. The major change is internal: I have +a new system for creating packages, which allows me to create 64-bit +packages. (I still don't do that for perftools, because there is +still no great 64-bit solution, with libunwind still giving problems +and --disable-frame-pointers not practical in every environment.) + +Another interesting change involves Windows: a +[http://code.google.com/p/google-perftools/issues/detail?id=126 new +patch] allows users to choose to override malloc/free/etc on Windows +rather than patching, as is done now. This can be used to create +custom CRTs. + +My fix for this +[http://groups.google.com/group/google-perftools/browse_thread/thread/1ff9b50043090d9d/a59210c4206f2060?lnk=gst&q=dynamic#a59210c4206f2060 +bug involving static linking] ended up being to make libtcmalloc.a and +libperftools.a a big .o file, rather than a true `ar` archive. This +should not yield any problems in practice -- in fact, it should be +better, since the heap profiler, leak checker, and cpu profiler will +now all work even with the static libraries -- but if you find it +does, please file a bug report. + +Finally, the profile_handler_unittest provided in the perftools +testsuite (new in this release) is failing on FreeBSD. The end-to-end +test that uses the profile-handler is passing, so I suspect the +problem may be with the test, not the perftools code itself. However, +I do not know enough about how itimers work on FreeBSD to be able to +debug it. If you can figure it out, please let me know! + +=== 11 March 2009 === + +I've just released perftools 1.1! + +It has many changes since perftools 1.0 including + + * Faster performance due to dynamically sized thread caches + * Better heap-sampling for more realistic profiles + * Improved support on Windows (MSVC 7.1 and cygwin) + * Better stacktraces in linux (using VDSO) + * Many bug fixes and feature requests + +Note: if you use the CPU-profiler with applications that fork without +doing an exec right afterwards, please see the README. Recent testing +has shown that profiles are unreliable in that case. The problem has +existed since the first release of perftools. We expect to have a fix +for perftools 1.2. For more details, see +[http://code.google.com/p/google-perftools/issues/detail?id=105 issue 105]. + +Everyone who uses perftools 1.0 is encouraged to upgrade to perftools +1.1. If you see any problems with the new release, please file a bug +report at http://code.google.com/p/google-perftools/issues/list. + +Enjoy! diff --git a/src/third_party/gperftools-2.7/README b/src/third_party/gperftools-2.7/README new file mode 100644 index 00000000000..8f61410df53 --- /dev/null +++ b/src/third_party/gperftools-2.7/README @@ -0,0 +1,284 @@ +gperftools +---------- +(originally Google Performance Tools) + +The fastest malloc we’ve seen; works particularly well with threads +and STL. Also: thread-friendly heap-checker, heap-profiler, and +cpu-profiler. + + +OVERVIEW +--------- + +gperftools is a collection of a high-performance multi-threaded +malloc() implementation, plus some pretty nifty performance analysis +tools. + +gperftools is distributed under the terms of the BSD License. Join our +mailing list at gperftools@googlegroups.com for updates: +https://groups.google.com/forum/#!forum/gperftools + +gperftools was original home for pprof program. But do note that +original pprof (which is still included with gperftools) is now +deprecated in favor of golang version at https://github.com/google/pprof + + +TCMALLOC +-------- +Just link in -ltcmalloc or -ltcmalloc_minimal to get the advantages of +tcmalloc -- a replacement for malloc and new. See below for some +environment variables you can use with tcmalloc, as well. + +tcmalloc functionality is available on all systems we've tested; see +INSTALL for more details. See README_windows.txt for instructions on +using tcmalloc on Windows. + +NOTE: When compiling with programs with gcc, that you plan to link +with libtcmalloc, it's safest to pass in the flags + + -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + +when compiling. gcc makes some optimizations assuming it is using its +own, built-in malloc; that assumption obviously isn't true with +tcmalloc. In practice, we haven't seen any problems with this, but +the expected risk is highest for users who register their own malloc +hooks with tcmalloc (using gperftools/malloc_hook.h). The risk is +lowest for folks who use tcmalloc_minimal (or, of course, who pass in +the above flags :-) ). + + +HEAP PROFILER +------------- +See docs/heapprofile.html for information about how to use tcmalloc's +heap profiler and analyze its output. + +As a quick-start, do the following after installing this package: + +1) Link your executable with -ltcmalloc +2) Run your executable with the HEAPPROFILE environment var set: + $ HEAPPROFILE=/tmp/heapprof <path/to/binary> [binary args] +3) Run pprof to analyze the heap usage + $ pprof <path/to/binary> /tmp/heapprof.0045.heap # run 'ls' to see options + $ pprof --gv <path/to/binary> /tmp/heapprof.0045.heap + +You can also use LD_PRELOAD to heap-profile an executable that you +didn't compile. + +There are other environment variables, besides HEAPPROFILE, you can +set to adjust the heap-profiler behavior; c.f. "ENVIRONMENT VARIABLES" +below. + +The heap profiler is available on all unix-based systems we've tested; +see INSTALL for more details. It is not currently available on Windows. + + +HEAP CHECKER +------------ +See docs/heap_checker.html for information about how to use tcmalloc's +heap checker. + +In order to catch all heap leaks, tcmalloc must be linked *last* into +your executable. The heap checker may mischaracterize some memory +accesses in libraries listed after it on the link line. For instance, +it may report these libraries as leaking memory when they're not. +(See the source code for more details.) + +Here's a quick-start for how to use: + +As a quick-start, do the following after installing this package: + +1) Link your executable with -ltcmalloc +2) Run your executable with the HEAPCHECK environment var set: + $ HEAPCHECK=1 <path/to/binary> [binary args] + +Other values for HEAPCHECK: normal (equivalent to "1"), strict, draconian + +You can also use LD_PRELOAD to heap-check an executable that you +didn't compile. + +The heap checker is only available on Linux at this time; see INSTALL +for more details. + + +CPU PROFILER +------------ +See docs/cpuprofile.html for information about how to use the CPU +profiler and analyze its output. + +As a quick-start, do the following after installing this package: + +1) Link your executable with -lprofiler +2) Run your executable with the CPUPROFILE environment var set: + $ CPUPROFILE=/tmp/prof.out <path/to/binary> [binary args] +3) Run pprof to analyze the CPU usage + $ pprof <path/to/binary> /tmp/prof.out # -pg-like text output + $ pprof --gv <path/to/binary> /tmp/prof.out # really cool graphical output + +There are other environment variables, besides CPUPROFILE, you can set +to adjust the cpu-profiler behavior; cf "ENVIRONMENT VARIABLES" below. + +The CPU profiler is available on all unix-based systems we've tested; +see INSTALL for more details. It is not currently available on Windows. + +NOTE: CPU profiling doesn't work after fork (unless you immediately + do an exec()-like call afterwards). Furthermore, if you do + fork, and the child calls exit(), it may corrupt the profile + data. You can use _exit() to work around this. We hope to have + a fix for both problems in the next release of perftools + (hopefully perftools 1.2). + + +EVERYTHING IN ONE +----------------- +If you want the CPU profiler, heap profiler, and heap leak-checker to +all be available for your application, you can do: + gcc -o myapp ... -lprofiler -ltcmalloc + +However, if you have a reason to use the static versions of the +library, this two-library linking won't work: + gcc -o myapp ... /usr/lib/libprofiler.a /usr/lib/libtcmalloc.a # errors! + +Instead, use the special libtcmalloc_and_profiler library, which we +make for just this purpose: + gcc -o myapp ... /usr/lib/libtcmalloc_and_profiler.a + + +CONFIGURATION OPTIONS +--------------------- +For advanced users, there are several flags you can pass to +'./configure' that tweak tcmalloc performace. (These are in addition +to the environment variables you can set at runtime to affect +tcmalloc, described below.) See the INSTALL file for details. + + +ENVIRONMENT VARIABLES +--------------------- +The cpu profiler, heap checker, and heap profiler will lie dormant, +using no memory or CPU, until you turn them on. (Thus, there's no +harm in linking -lprofiler into every application, and also -ltcmalloc +assuming you're ok using the non-libc malloc library.) + +The easiest way to turn them on is by setting the appropriate +environment variables. We have several variables that let you +enable/disable features as well as tweak parameters. + +Here are some of the most important variables: + +HEAPPROFILE=<pre> -- turns on heap profiling and dumps data using this prefix +HEAPCHECK=<type> -- turns on heap checking with strictness 'type' +CPUPROFILE=<file> -- turns on cpu profiling and dumps data to this file. +PROFILESELECTED=1 -- if set, cpu-profiler will only profile regions of code + surrounded with ProfilerEnable()/ProfilerDisable(). +CPUPROFILE_FREQUENCY=x-- how many interrupts/second the cpu-profiler samples. + +PERFTOOLS_VERBOSE=<level> -- the higher level, the more messages malloc emits +MALLOCSTATS=<level> -- prints memory-use stats at program-exit + +For a full list of variables, see the documentation pages: + docs/cpuprofile.html + docs/heapprofile.html + docs/heap_checker.html + + +COMPILING ON NON-LINUX SYSTEMS +------------------------------ + +Perftools was developed and tested on x86 Linux systems, and it works +in its full generality only on those systems. However, we've +successfully ported much of the tcmalloc library to FreeBSD, Solaris +x86, and Darwin (Mac OS X) x86 and ppc; and we've ported the basic +functionality in tcmalloc_minimal to Windows. See INSTALL for details. +See README_windows.txt for details on the Windows port. + + +PERFORMANCE +----------- + +If you're interested in some third-party comparisons of tcmalloc to +other malloc libraries, here are a few web pages that have been +brought to our attention. The first discusses the effect of using +various malloc libraries on OpenLDAP. The second compares tcmalloc to +win32's malloc. + http://www.highlandsun.com/hyc/malloc/ + http://gaiacrtn.free.fr/articles/win32perftools.html + +It's possible to build tcmalloc in a way that trades off faster +performance (particularly for deletes) at the cost of more memory +fragmentation (that is, more unusable memory on your system). See the +INSTALL file for details. + + +OLD SYSTEM ISSUES +----------------- + +When compiling perftools on some old systems, like RedHat 8, you may +get an error like this: + ___tls_get_addr: symbol not found + +This means that you have a system where some parts are updated enough +to support Thread Local Storage, but others are not. The perftools +configure script can't always detect this kind of case, leading to +that error. To fix it, just comment out (or delete) the line + #define HAVE_TLS 1 +in your config.h file before building. + + +64-BIT ISSUES +------------- + +There are two issues that can cause program hangs or crashes on x86_64 +64-bit systems, which use the libunwind library to get stack-traces. +Neither issue should affect the core tcmalloc library; they both +affect the perftools tools such as cpu-profiler, heap-checker, and +heap-profiler. + +1) Some libc's -- at least glibc 2.4 on x86_64 -- have a bug where the +libc function dl_iterate_phdr() acquires its locks in the wrong +order. This bug should not affect tcmalloc, but may cause occasional +deadlock with the cpu-profiler, heap-profiler, and heap-checker. +Its likeliness increases the more dlopen() commands an executable has. +Most executables don't have any, though several library routines like +getgrgid() call dlopen() behind the scenes. + +2) On x86-64 64-bit systems, while tcmalloc itself works fine, the +cpu-profiler tool is unreliable: it will sometimes work, but sometimes +cause a segfault. I'll explain the problem first, and then some +workarounds. + +Note that this only affects the cpu-profiler, which is a +gperftools feature you must turn on manually by setting the +CPUPROFILE environment variable. If you do not turn on cpu-profiling, +you shouldn't see any crashes due to perftools. + +The gory details: The underlying problem is in the backtrace() +function, which is a built-in function in libc. +Backtracing is fairly straightforward in the normal case, but can run +into problems when having to backtrace across a signal frame. +Unfortunately, the cpu-profiler uses signals in order to register a +profiling event, so every backtrace that the profiler does crosses a +signal frame. + +In our experience, the only time there is trouble is when the signal +fires in the middle of pthread_mutex_lock. pthread_mutex_lock is +called quite a bit from system libraries, particularly at program +startup and when creating a new thread. + +The solution: The dwarf debugging format has support for 'cfi +annotations', which make it easy to recognize a signal frame. Some OS +distributions, such as Fedora and gentoo 2007.0, already have added +cfi annotations to their libc. A future version of libunwind should +recognize these annotations; these systems should not see any +crashses. + +Workarounds: If you see problems with crashes when running the +cpu-profiler, consider inserting ProfilerStart()/ProfilerStop() into +your code, rather than setting CPUPROFILE. This will profile only +those sections of the codebase. Though we haven't done much testing, +in theory this should reduce the chance of crashes by limiting the +signal generation to only a small part of the codebase. Ideally, you +would not use ProfilerStart()/ProfilerStop() around code that spawns +new threads, or is otherwise likely to cause a call to +pthread_mutex_lock! + +--- +17 May 2011 diff --git a/src/third_party/gperftools-2.7/README_windows.txt b/src/third_party/gperftools-2.7/README_windows.txt new file mode 100644 index 00000000000..7bba12201e0 --- /dev/null +++ b/src/third_party/gperftools-2.7/README_windows.txt @@ -0,0 +1,120 @@ +--- COMPILING
+
+This project has begun being ported to Windows, only tcmalloc_minimal
+is supported at this time. A working solution file exists in this
+directory:
+ gperftools.sln
+
+You can load this solution file into VC++ 7.1 (Visual Studio 2003) or
+later -- in the latter case, it will automatically convert the files
+to the latest format for you.
+
+When you build the solution, it will create a number of unittests,
+which you can run by hand (or, more easily, under the Visual Studio
+debugger) to make sure everything is working properly on your system.
+The binaries will end up in a directory called "debug" or "release" in
+the top-level directory (next to the .sln file). It will also create
+two binaries, nm-pdb and addr2line-pdb, which you should install in
+the same directory you install the 'pprof' perl script.
+
+I don't know very much about how to install DLLs on Windows, so you'll
+have to figure out that part for yourself. If you choose to just
+re-use the existing .sln, make sure you set the IncludeDir's
+appropriately! Look at the properties for libtcmalloc_minimal.dll.
+
+Note that these systems are set to build in Debug mode by default.
+You may want to change them to Release mode.
+
+To use tcmalloc_minimal in your own projects, you should only need to
+build the dll and install it someplace, so you can link it into
+further binaries. To use the dll, you need to add the following to
+the linker line of your executable:
+ "libtcmalloc_minimal.lib" /INCLUDE:"__tcmalloc"
+
+Here is how to accomplish this in Visual Studio 2005 (VC8):
+
+1) Have your executable depend on the tcmalloc library by selecting
+ "Project Dependencies..." from the "Project" menu. Your executable
+ should depend on "libtcmalloc_minimal".
+
+2) Have your executable depend on a tcmalloc symbol -- this is
+ necessary so the linker doesn't "optimize out" the libtcmalloc
+ dependency -- by right-clicking on your executable's project (in
+ the solution explorer), selecting Properties from the pull-down
+ menu, then selecting "Configuration Properties" -> "Linker" ->
+ "Input". Then, in the "Force Symbol References" field, enter the
+ text "__tcmalloc" (without the quotes). Be sure to do this for both
+ debug and release modes!
+
+You can also link tcmalloc code in statically -- see the example
+project tcmalloc_minimal_unittest-static, which does this. For this
+to work, you'll need to add "/D PERFTOOLS_DLL_DECL=" to the compile
+line of every perftools .cc file. You do not need to depend on the
+tcmalloc symbol in this case (that is, you don't need to do either
+step 1 or step 2 from above).
+
+An alternative to all the above is to statically link your application
+with libc, and then replace its malloc with tcmalloc. This allows you
+to just build and link your program normally; the tcmalloc support
+comes in a post-processing step. This is more reliable than the above
+technique (which depends on run-time patching, which is inherently
+fragile), though more work to set up. For details, see
+ https://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b
+
+
+--- THE HEAP-PROFILER
+
+The heap-profiler has had a preliminary port to Windows but does not
+build on Windows by default. It has not been well tested, and
+probably does not work at all when Frame Pointer Optimization (FPO) is
+enabled -- that is, in release mode. The other features of perftools,
+such as the cpu-profiler and leak-checker, have not yet been ported to
+Windows at all.
+
+
+--- WIN64
+
+The function-patcher has to disassemble code, and is very
+x86-specific. However, the rest of perftools should work fine for
+both x86 and x64. In particular, if you use the 'statically link with
+libc, and replace its malloc with tcmalloc' approach, mentioned above,
+it should be possible to use tcmalloc with 64-bit windows.
+
+As of perftools 1.10, there is some support for disassembling x86_64
+instructions, for work with win64. This work is preliminary, but the
+test file preamble_patcher_test.cc is provided to play around with
+that a bit. preamble_patcher_test will not compile on win32.
+
+
+--- ISSUES
+
+NOTE FOR WIN2K USERS: According to reports
+(http://code.google.com/p/gperftools/issues/detail?id=127)
+the stack-tracing necessary for the heap-profiler does not work on
+Win2K. The best workaround is, if you are building on a Win2k system
+is to add "/D NO_TCMALLOC_SAMPLES=" to your build, to turn off the
+stack-tracing. You will not be able to use the heap-profiler if you
+do this.
+
+NOTE ON _MSIZE and _RECALLOC: The tcmalloc version of _msize returns
+the size of the region tcmalloc allocated for you -- which is at least
+as many bytes you asked for, but may be more. (btw, these *are* bytes
+you own, even if you didn't ask for all of them, so it's correct code
+to access all of them if you want.) Unfortunately, the Windows CRT
+_recalloc() routine assumes that _msize returns exactly as many bytes
+as were requested. As a result, _recalloc() may not zero out new
+bytes correctly. IT'S SAFEST NOT TO USE _RECALLOC WITH TCMALLOC.
+_recalloc() is a tricky routine to use in any case (it's not safe to
+use with realloc, for instance).
+
+
+I have little experience with Windows programming, so there may be
+better ways to set this up than I've done! If you run across any
+problems, please post to the google-perftools Google Group, or report
+them on the gperftools Google Code site:
+ http://groups.google.com/group/google-perftools
+ http://code.google.com/p/gperftools/issues/list
+
+-- craig
+
+Last modified: 2 February 2012
diff --git a/src/third_party/gperftools-2.7/SConscript b/src/third_party/gperftools-2.7/SConscript new file mode 100644 index 00000000000..43ec25c4473 --- /dev/null +++ b/src/third_party/gperftools-2.7/SConscript @@ -0,0 +1,118 @@ +# -*- mode: python -*- + +Import("env") +Import("has_option") +Import("debugBuild") + +env = env.Clone() + +files = [ + 'src/base/dynamic_annotations.c', + 'src/base/elf_mem_image.cc', + 'src/base/logging.cc', + 'src/base/spinlock.cc', + 'src/base/spinlock_internal.cc', + 'src/base/sysinfo.cc', + 'src/base/vdso_support.cc', + 'src/central_freelist.cc', + 'src/common.cc', + 'src/internal_logging.cc', + 'src/malloc_extension.cc', + 'src/malloc_hook.cc', + 'src/memfs_malloc.cc', + 'src/page_heap.cc', + 'src/sampler.cc', + 'src/span.cc', + 'src/stack_trace_table.cc', + 'src/stacktrace.cc', + 'src/static_vars.cc', + 'src/symbolize.cc', + 'src/thread_cache.cc', + ] + +if env.TargetOSIs('windows'): + files += [ + 'src/tcmalloc.cc', + 'src/windows/port.cc', + 'src/windows/system-alloc.cc', + 'src/fake_stacktrace_scope.cc', + ] + + # warning C4141: 'inline': used more than once + # warning C4305: 'argument': truncation from 'ssize_t' to 'double' + env.Append(CXXFLAGS=["/wd4141", "/wd4305"]) + +else: + files += [ + 'src/emergency_malloc_for_stacktrace.cc', + 'src/maybe_threads.cc', + 'src/system-alloc.cc', + ] + + if not debugBuild: + files += ['src/tcmalloc.cc'], + else: + files += ['src/debugallocation.cc'] + +if has_option( 'use-cpu-profiler' ): + files += [ + 'src/profile-handler.cc', + 'src/profiledata.cc', + 'src/profiler.cc', + ] + + +conf = Configure(env.Clone()) + +if has_option('use-cpu-profiler'): + if not conf.CheckLib('unwind', autoadd=False): + env.ConfError("Compiling with --use-cpu-profiler requires having libunwind installed") + conf.env.Append( + CPPDEFINES=["NO_FRAME_POINTER", ("HAVE_LIBUNWIND_H", "1"), 'HAVE_UCONTEXT_H'], + SYSLIBDEPS=['unwind'] + ) + +env = conf.Finish() + +env.Append(CPPPATH=["build_" + env["TARGET_OS"] + "_" + env["TARGET_ARCH"]]) + +env.Append( + CPPDEFINES=["NO_HEAP_CHECK"], +) + +# The build system doesn't define NDEBUG globally for historical reasons, however, TCMalloc +# expects that NDEBUG is used to select between preferring the mmap or the sbrk allocator. For +# non-debug builds, we want to prefer the sbrk allocator since this is TCMallocs preferred +# production deployment configuration. See the use of NDEBUG and kDebugMode in +# src/system-alloc.cc for more details. +if not debugBuild: + env.Append( CPPDEFINES=["NDEBUG"] ) + +# For debug builds we want to capture stacks during (de)allocations, +# but we don't want to pay that cost for release builds. For non-debug +# builds we use NO_TCMALLOC_SAMPLES to disable the stack trace +# collection. For debug builds we enable stack capture, but only on +# intel targets, since tcmalloc's unwinder is very slow on other +# platforms (see SERVER-28502). +if (not debugBuild) or (not env['TARGET_ARCH'] in ['x86_64', 'i386']): + env.Append(CPPDEFINES=["NO_TCMALLOC_SAMPLES"]) + +env.Prepend( CPPPATH=["src/"] ) + +def removeIfPresent(lst, item): + try: + lst.remove(item) + except ValueError: + pass + +for to_remove in ['-Werror', "-Wsign-compare","-Wall","-Werror=unused-result"]: + removeIfPresent(env['CCFLAGS'], to_remove) + +# GCC on PowerPC under C++11 mode does not define __linux which gperftools depends on +if env['TARGET_ARCH'] == 'ppc64le': + env.Append( CPPDEFINES=[ "__linux"] ) + +env.Library( + target='tcmalloc_minimal', + source=files, +) diff --git a/src/third_party/gperftools-2.7/TODO b/src/third_party/gperftools-2.7/TODO new file mode 100644 index 00000000000..550f7e09b9d --- /dev/null +++ b/src/third_party/gperftools-2.7/TODO @@ -0,0 +1,47 @@ +HEAP PROFILER + +1) Fix heap profiling under all STLs + * Find out how to force non-glibc STL libraries to call new() and + delete() for every allocation / deallocation. + * Make heap profiler ignore STL-internal allocations for those + libraries under which we cannot profile accurately, so we only + see object-level leaks. +2) Remove dependency on tcmalloc? +3) Port to non-linux O/Ses (right now code uses /proc for library info) +4) Port to non-x86 architectures (locking code in spinlock is x86-specific) +5) Port to C? +6) Figure out how to get setenv() to work properly before main() in + shared libaries, and get rid of the profile-naming hack once we + do. (See HeapProfiler::Init().) + + +HEAP CHECKER + +1) Remove requirement that the heap-checker must be linked last into + an application (hard! -- it needs its global constructor to run + first) + +TCMALLOC + +1) Implement mallinfo/mallopt +2) Have tcmalloc work correctly when libpthread is not linked in + (currently working for glibc, could use other libc's too) +3) Return memory to the system when requirements drop +4) Explore coloring allocated objects to avoid cache conflicts +5) Explore biasing reclamation to larger addresses +6) Add contention stats to a synchronization.cc (can do spinlocks, + but threads? -- may have to provide our own thread implementation) + +CPU PROFILER + +1) Figure out how to get setenv() to work properly before main() in + shared libaries(), and get rid of the profile-naming hack once we + do. (See Profiler::GetUniquePathFromEnv().) +2) Resolve crashing problems on x86_64 (see README) + +STACKTRACE + +1) Remove dependency on linux/x86 + +--- +11 March 2008 diff --git a/src/third_party/gperftools-2.7/build_linux_aarch64/config.h b/src/third_party/gperftools-2.7/build_linux_aarch64/config.h new file mode 100644 index 00000000000..90be290bcf9 --- /dev/null +++ b/src/third_party/gperftools-2.7/build_linux_aarch64/config.h @@ -0,0 +1,311 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +#define HAVE_BUILTIN_EXPECT 1 + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +/* #undef HAVE_DECL_BACKTRACE */ + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 1 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +/* #undef HAVE_DECL_NANOSLEEP */ + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 1 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SLEEP */ + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 1 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 1 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#define HAVE_ELF32_VERSYM 1 + +/* Define to 1 if you have the <execinfo.h> header file. */ +#define HAVE_EXECINFO_H 1 + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +#define HAVE_FEATURES_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if you have the <glob.h> header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the <grp.h> header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#define HAVE_LINUX_PTRACE_H 1 + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#define HAVE_LINUX_SIGEV_THREAD_ID 1 + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP 1 + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#define HAVE_POLL_H 1 + +/* define if libc has program_invocation_name */ +#define HAVE_PROGRAM_INVOCATION_NAME 1 + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `sbrk' function. */ +#define HAVE_SBRK 1 + +/* Define to 1 if you have the <sched.h> header file. */ +#define HAVE_SCHED_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#define HAVE_STRUCT_MALLINFO 1 + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#define HAVE_SYS_PRCTL_H 1 + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#define HAVE_SYS_SYSCALL_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#define HAVE_SYS_UCONTEXT_H 1 + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#define HAVE_UCONTEXT_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#define HAVE_UNWIND_BACKTRACE 1 + +/* Define to 1 if you have the <unwind.h> header file. */ +#define HAVE_UNWIND_H 1 + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +#define HAVE___ATTRIBUTE__ 1 + +/* define if your compiler supports alignment of functions */ +#define HAVE___ATTRIBUTE__ALIGNED_FN 1 + +/* Define to 1 if compiler supports __environ */ +#define HAVE___ENVIRON 1 + +/* Define to 1 if the system has the type `__int64'. */ +/* #undef HAVE___INT64 */ + +/* prefix where we look for installed files */ +#define INSTALL_PREFIX "/usr/local" + +/* Define to 1 if int32_t is equivalent to intptr_t */ +/* #undef INT32_EQUALS_INTPTR */ + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +#define PC_FROM_UCONTEXT uc_mcontext.pc + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#define PERFTOOLS_DLL_DECL /**/ + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "ld" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "lu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "lx" + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_64K_PAGES */ + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.7/build_linux_ppc64le/config.h b/src/third_party/gperftools-2.7/build_linux_ppc64le/config.h new file mode 100644 index 00000000000..ea31bf10fd1 --- /dev/null +++ b/src/third_party/gperftools-2.7/build_linux_ppc64le/config.h @@ -0,0 +1,311 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +#define HAVE_BUILTIN_EXPECT 1 + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +/* #undef HAVE_DECL_BACKTRACE */ + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 1 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +/* #undef HAVE_DECL_NANOSLEEP */ + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 1 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SLEEP */ + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 1 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 1 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#define HAVE_ELF32_VERSYM 1 + +/* Define to 1 if you have the <execinfo.h> header file. */ +#define HAVE_EXECINFO_H 1 + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +#define HAVE_FEATURES_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if you have the <glob.h> header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the <grp.h> header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#define HAVE_LINUX_PTRACE_H 1 + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#define HAVE_LINUX_SIGEV_THREAD_ID 1 + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP 1 + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#define HAVE_POLL_H 1 + +/* define if libc has program_invocation_name */ +#define HAVE_PROGRAM_INVOCATION_NAME 1 + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `sbrk' function. */ +#define HAVE_SBRK 1 + +/* Define to 1 if you have the <sched.h> header file. */ +#define HAVE_SCHED_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#define HAVE_STRUCT_MALLINFO 1 + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#define HAVE_SYS_PRCTL_H 1 + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#define HAVE_SYS_SYSCALL_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#define HAVE_SYS_UCONTEXT_H 1 + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#define HAVE_UCONTEXT_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#define HAVE_UNWIND_BACKTRACE 1 + +/* Define to 1 if you have the <unwind.h> header file. */ +#define HAVE_UNWIND_H 1 + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +#define HAVE___ATTRIBUTE__ 1 + +/* define if your compiler supports alignment of functions */ +#define HAVE___ATTRIBUTE__ALIGNED_FN 1 + +/* Define to 1 if compiler supports __environ */ +#define HAVE___ENVIRON 1 + +/* Define to 1 if the system has the type `__int64'. */ +/* #undef HAVE___INT64 */ + +/* prefix where we look for installed files */ +#define INSTALL_PREFIX "/usr/local" + +/* Define to 1 if int32_t is equivalent to intptr_t */ +/* #undef INT32_EQUALS_INTPTR */ + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +#define PC_FROM_UCONTEXT uc_mcontext.gp_regs[PT_NIP] + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#define PERFTOOLS_DLL_DECL /**/ + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "ld" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "lu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "lx" + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +#define TCMALLOC_64K_PAGES 1 + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.7/build_linux_s390x/config.h b/src/third_party/gperftools-2.7/build_linux_s390x/config.h new file mode 100644 index 00000000000..5b3c9c684c6 --- /dev/null +++ b/src/third_party/gperftools-2.7/build_linux_s390x/config.h @@ -0,0 +1,311 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +#define HAVE_BUILTIN_EXPECT 1 + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +#define HAVE_DECL_BACKTRACE 1 + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 1 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +/* #undef HAVE_DECL_NANOSLEEP */ + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 1 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SLEEP */ + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 1 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 1 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#define HAVE_ELF32_VERSYM 1 + +/* Define to 1 if you have the <execinfo.h> header file. */ +#define HAVE_EXECINFO_H 1 + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +#define HAVE_FEATURES_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if you have the <glob.h> header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the <grp.h> header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#define HAVE_LINUX_PTRACE_H 1 + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#define HAVE_LINUX_SIGEV_THREAD_ID 1 + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP 1 + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#define HAVE_POLL_H 1 + +/* define if libc has program_invocation_name */ +#define HAVE_PROGRAM_INVOCATION_NAME 1 + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `sbrk' function. */ +#define HAVE_SBRK 1 + +/* Define to 1 if you have the <sched.h> header file. */ +#define HAVE_SCHED_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#define HAVE_STRUCT_MALLINFO 1 + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#define HAVE_SYS_PRCTL_H 1 + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#define HAVE_SYS_SYSCALL_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#define HAVE_SYS_UCONTEXT_H 1 + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#define HAVE_UCONTEXT_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#define HAVE_UNWIND_BACKTRACE 1 + +/* Define to 1 if you have the <unwind.h> header file. */ +#define HAVE_UNWIND_H 1 + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +#define HAVE___ATTRIBUTE__ 1 + +/* define if your compiler supports alignment of functions */ +#define HAVE___ATTRIBUTE__ALIGNED_FN 1 + +/* Define to 1 if compiler supports __environ */ +#define HAVE___ENVIRON 1 + +/* Define to 1 if the system has the type `__int64'. */ +/* #undef HAVE___INT64 */ + +/* prefix where we look for installed files */ +#define INSTALL_PREFIX "/usr/local" + +/* Define to 1 if int32_t is equivalent to intptr_t */ +/* #undef INT32_EQUALS_INTPTR */ + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +#define PC_FROM_UCONTEXT uc_mcontext.psw.addr + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#define PERFTOOLS_DLL_DECL /**/ + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "ld" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "lu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "lx" + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_64K_PAGES */ + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.7/build_linux_x86_64/config.h b/src/third_party/gperftools-2.7/build_linux_x86_64/config.h new file mode 100644 index 00000000000..30d58dda653 --- /dev/null +++ b/src/third_party/gperftools-2.7/build_linux_x86_64/config.h @@ -0,0 +1,311 @@ +/* src/config.h. Generated from config.h.in by configure. */ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +#define HAVE_BUILTIN_EXPECT 1 + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +/* #undef HAVE_DECL_BACKTRACE */ + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 1 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +/* #undef HAVE_DECL_NANOSLEEP */ + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 1 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 1 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SLEEP */ + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 1 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 1 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#define HAVE_ELF32_VERSYM 1 + +/* Define to 1 if you have the <execinfo.h> header file. */ +#define HAVE_EXECINFO_H 1 + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +#define HAVE_FEATURES_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 + +/* Define to 1 if you have the <glob.h> header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the <grp.h> header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#define HAVE_LINUX_PTRACE_H 1 + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#define HAVE_LINUX_SIGEV_THREAD_ID 1 + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP 1 + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#define HAVE_POLL_H 1 + +/* define if libc has program_invocation_name */ +#define HAVE_PROGRAM_INVOCATION_NAME 1 + +/* Define if you have POSIX threads libraries and header files. */ +#define HAVE_PTHREAD 1 + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `sbrk' function. */ +#define HAVE_SBRK 1 + +/* Define to 1 if you have the <sched.h> header file. */ +#define HAVE_SCHED_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#define HAVE_STRUCT_MALLINFO 1 + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#define HAVE_SYS_PRCTL_H 1 + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#define HAVE_SYS_SYSCALL_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#define HAVE_SYS_UCONTEXT_H 1 + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#define HAVE_UCONTEXT_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#define HAVE_UNWIND_BACKTRACE 1 + +/* Define to 1 if you have the <unwind.h> header file. */ +#define HAVE_UNWIND_H 1 + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +#define HAVE___ATTRIBUTE__ 1 + +/* define if your compiler supports alignment of functions */ +#define HAVE___ATTRIBUTE__ALIGNED_FN 1 + +/* Define to 1 if compiler supports __environ */ +#define HAVE___ENVIRON 1 + +/* Define to 1 if the system has the type `__int64'. */ +/* #undef HAVE___INT64 */ + +/* prefix where we look for installed files */ +#define INSTALL_PREFIX "/usr/local" + +/* Define to 1 if int32_t is equivalent to intptr_t */ +/* #undef INT32_EQUALS_INTPTR */ + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP] + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#define PERFTOOLS_DLL_DECL /**/ + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "ld" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "lu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "lx" + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_64K_PAGES */ + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.7/build_windows_x86_64/config.h b/src/third_party/gperftools-2.7/build_windows_x86_64/config.h new file mode 100644 index 00000000000..349d70a315d --- /dev/null +++ b/src/third_party/gperftools-2.7/build_windows_x86_64/config.h @@ -0,0 +1,376 @@ +/* A manual version of config.h fit for windows machines. + * + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +/* Sometimes we accidentally #include this config.h instead of the one + in .. -- this is particularly true for msys/mingw, which uses the + unix config.h but also runs code in the windows directory. + */ +#ifdef __MINGW32__ +#include "../config.h" +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#endif + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +/* used by tcmalloc.h */ +#define GPERFTOOLS_CONFIG_H_ + +/* define this if you are linking tcmalloc statically and overriding the + * default allocators. + * For instructions on how to use this mode, see + * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + */ +#undef WIN32_OVERRIDE_ALLOCATORS + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +/* #undef HAVE_BUILTIN_EXPECT */ + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +/* #undef HAVE_DECL_BACKTRACE */ + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 0 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 0 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +#define HAVE_DECL_NANOSLEEP 0 + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 0 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 0 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +#define HAVE_DECL_SLEEP 0 + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 0 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 0 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +/* #undef HAVE_ELF32_VERSYM */ + +/* Define to 1 if you have the <execinfo.h> header file. */ +/* #undef HAVE_EXECINFO_H */ + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +/* #undef HAVE_FEATURES_H */ + +/* Define to 1 if you have the `fork' function. */ +/* #undef HAVE_FORK */ + +/* Define to 1 if you have the `geteuid' function. */ +/* #undef HAVE_GETEUID */ + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 /* we define it in windows/port.cc */ + +/* Define to 1 if you have the <glob.h> header file. */ +/* #undef HAVE_GLOB_H */ + +/* Define to 1 if you have the <grp.h> header file. */ +/* #undef HAVE_GRP_H */ + +/* Define to 1 if you have the <inttypes.h> header file. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_INTTYPES_H 1 +#endif + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +/* #undef HAVE_LINUX_PTRACE_H */ + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +/* #undef HAVE_LINUX_SIGEV_THREAD_ID */ + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +/* #undef HAVE_MMAP */ + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +/* #undef HAVE_POLL_H */ + +/* define if libc has program_invocation_name */ +/* #undef HAVE_PROGRAM_INVOCATION_NAME */ + +/* Define if you have POSIX threads libraries and header files. */ +/* #undef HAVE_PTHREAD */ + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +/* #undef HAVE_PWD_H */ + +/* Define to 1 if you have the `sbrk' function. */ +/* #undef HAVE_SBRK */ + +/* Define to 1 if you have the <sched.h> header file. */ +/* #undef HAVE_SCHED_H */ + +/* Define to 1 if you have the <stdint.h> header file. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_STDINT_H 1 +#endif + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +/* #undef HAVE_STRINGS_H */ + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +/* #undef HAVE_STRUCT_MALLINFO */ + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the <sys/param.h> header file. */ +/* #undef HAVE_SYS_PARAM_H */ + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +/* #undef HAVE_SYS_PRCTL_H */ + +/* Define to 1 if you have the <sys/resource.h> header file. */ +/* #undef HAVE_SYS_RESOURCE_H */ + +/* Define to 1 if you have the <sys/socket.h> header file. */ +/* #undef HAVE_SYS_SOCKET_H */ + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +/* #undef HAVE_SYS_SYSCALL_H */ + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +/* #undef HAVE_SYS_UCONTEXT_H */ + +/* Define to 1 if you have the <sys/wait.h> header file. */ +/* #undef HAVE_SYS_WAIT_H */ + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +/* #undef HAVE_UCONTEXT_H */ + +/* Define to 1 if you have the <unistd.h> header file. */ +/* #undef HAVE_UNISTD_H */ + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +/* #undef HAVE_UNWIND_BACKTRACE */ + +/* Define to 1 if you have the <unwind.h> header file. */ +/* #undef HAVE_UNWIND_H */ + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +/* #undef HAVE___ATTRIBUTE__ */ + +/* define if your compiler supports alignment of functions */ +/* #undef HAVE___ATTRIBUTE__ALIGNED_FN */ + +/* Define to 1 if compiler supports __environ */ +/* #undef HAVE___ENVIRON */ + +/* Define to 1 if the system has the type `__int64'. */ +#define HAVE___INT64 1 + +/* prefix where we look for installed files */ +/* #undef INSTALL_PREFIX */ + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#ifndef _WIN64 +#define INT32_EQUALS_INTPTR 1 +#endif + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +/* #undef LT_OBJDIR */ + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +/* #undef PC_FROM_UCONTEXT */ + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#ifndef PERFTOOLS_DLL_DECL +# define PERFTOOLS_IS_A_DLL 1 /* not set if you're statically linking */ +# define PERFTOOLS_DLL_DECL __declspec(dllexport) +# define PERFTOOLS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIdS "lld" +#else +#define PRIdS "d" +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIuS "llu" +#else +#define PRIuS "u" +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIxS "llx" +#else +#define PRIxS "x" +#endif + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_64K_PAGES */ + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +// --------------------------------------------------------------------- +// Extra stuff not found in config.h.in + +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_SNPRINTF 1 +#endif + +// We want to make sure not to ever try to #include heap-checker.h +#define NO_HEAP_CHECK 1 + +// TODO(csilvers): include windows/port.h in every relevant source file instead? +#include "windows/port.h" + +// MONGODB MODIFCATION - disable DLL hook +// gperftools/windows/patch_functions.cc normally defines this function, +// but we do not link this file since it would dynamically patch our functions. +// We override the behavior of this function to no-patch functions, but instead +// simply to do nothing +// TCMalloc calls this via a static initializer +static void PatchWindowsFunctions() { + // Intentionally left empty +} +#endif /* GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ */ diff --git a/src/third_party/gperftools-2.7/docs/cpuprofile-fileformat.html b/src/third_party/gperftools-2.7/docs/cpuprofile-fileformat.html new file mode 100644 index 00000000000..3f90e6bc78e --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/cpuprofile-fileformat.html @@ -0,0 +1,264 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<HTML> + +<HEAD> + <link rel="stylesheet" href="designstyle.css"> + <title>Google CPU Profiler Binary Data File Format</title> +</HEAD> + +<BODY> + +<h1>Google CPU Profiler Binary Data File Format</h1> + +<p align=right> + <i>Last modified + <script type=text/javascript> + var lm = new Date(document.lastModified); + document.write(lm.toDateString()); + </script></i> +</p> + +<p>This file documents the binary data file format produced by the +Google CPU Profiler. For information about using the CPU Profiler, +see <a href="cpuprofile.html">its user guide</a>. + +<p>The profiler source code, which generates files using this format, is at +<code>src/profiler.cc</code></a>. + + +<h2>CPU Profile Data File Structure</h2> + +<p>CPU profile data files each consist of four parts, in order: + +<ul> + <li> Binary header + <li> Binary profile records + <li> Binary trailer + <li> Text list of mapped objects +</ul> + +<p>The binary data is expressed in terms of "slots." These are words +large enough to hold the program's pointer type, i.e., for 32-bit +programs they are 4 bytes in size, and for 64-bit programs they are 8 +bytes. They are stored in the profile data file in the native byte +order (i.e., little-endian for x86 and x86_64). + + +<h2>Binary Header</h2> + +<p>The binary header format is show below. Values written by the +profiler, along with requirements currently enforced by the analysis +tools, are shown in parentheses. + +<p> +<table summary="Header Format" + frame="box" rules="sides" cellpadding="5" width="50%"> + <tr> + <th width="30%">slot</th> + <th width="70%">data</th> + </tr> + + <tr> + <td>0</td> + <td>header count (0; must be 0)</td> + </tr> + + <tr> + <td>1</td> + <td>header slots after this one (3; must be >= 3)</td> + </tr> + + <tr> + <td>2</td> + <td>format version (0; must be 0)</td> + </tr> + + <tr> + <td>3</td> + <td>sampling period, in microseconds</td> + </tr> + + <tr> + <td>4</td> + <td>padding (0)</td> + </tr> +</table> + +<p>The headers currently generated for 32-bit and 64-bit little-endian +(x86 and x86_64) profiles are shown below, for comparison. + +<p> +<table summary="Header Example" frame="box" rules="sides" cellpadding="5"> + <tr> + <th></th> + <th>hdr count</th> + <th>hdr words</th> + <th>version</th> + <th>sampling period</th> + <th>pad</th> + </tr> + <tr> + <td>32-bit or 64-bit (slots)</td> + <td>0</td> + <td>3</td> + <td>0</td> + <td>10000</td> + <td>0</td> + </tr> + <tr> + <td>32-bit (4-byte words in file)</td> + <td><tt>0x00000</tt></td> + <td><tt>0x00003</tt></td> + <td><tt>0x00000</tt></td> + <td><tt>0x02710</tt></td> + <td><tt>0x00000</tt></td> + </tr> + <tr> + <td>64-bit LE (4-byte words in file)</td> + <td><tt>0x00000 0x00000</tt></td> + <td><tt>0x00003 0x00000</tt></td> + <td><tt>0x00000 0x00000</tt></td> + <td><tt>0x02710 0x00000</tt></td> + <td><tt>0x00000 0x00000</tt></td> + </tr> +</table> + +<p>The contents are shown in terms of slots, and in terms of 4-byte +words in the profile data file. The slot contents for 32-bit and +64-bit headers are identical. For 32-bit profiles, the 4-byte word +view matches the slot view. For 64-bit profiles, each (8-byte) slot +is shown as two 4-byte words, ordered as they would appear in the +file. + +<p>The profiling tools examine the contents of the file and use the +expected locations and values of the header words field to detect +whether the file is 32-bit or 64-bit. + + +<h2>Binary Profile Records</h2> + +<p>The binary profile record format is shown below. + +<p> +<table summary="Profile Record Format" + frame="box" rules="sides" cellpadding="5" width="50%"> + <tr> + <th width="30%">slot</th> + <th width="70%">data</th> + </tr> + + <tr> + <td>0</td> + <td>sample count, must be >= 1</td> + </tr> + + <tr> + <td>1</td> + <td>number of call chain PCs (num_pcs), must be >= 1</td> + </tr> + + <tr> + <td>2 .. (num_pcs + 1)</td> + <td>call chain PCs, most-recently-called function first. + </tr> +</table> + +<p>The total length of a given record is 2 + num_pcs. + +<p>Note that multiple profile records can be emitted by the profiler +having an identical call chain. In that case, analysis tools should +sum the counts of all records having identical call chains. + +<p><b>Note:</b> Some profile analysis tools terminate if they see +<em>any</em> profile record with a call chain with its first entry +having the address 0. (This is similar to the binary trailer.) + +<h3>Example</h3> + +This example shows the slots contained in a sample profile record. + +<p> +<table summary="Profile Record Example" + frame="box" rules="sides" cellpadding="5"> + <tr> + <td>5</td> + <td>3</td> + <td>0xa0000</td> + <td>0xc0000</td> + <td>0xe0000</td> + </tr> +</table> + +<p>In this example, 5 ticks were received at PC 0xa0000, whose +function had been called by the function containing 0xc0000, which had +been called from the function containing 0xe0000. + + +<h2>Binary Trailer</h2> + +<p>The binary trailer consists of three slots of data with fixed +values, shown below. + +<p> +<table summary="Trailer Format" + frame="box" rules="sides" cellpadding="5" width="50%"> + <tr> + <th width="30%">slot</th> + <th width="70%">value</th> + </tr> + + <tr> + <td>0</td> + <td>0</td> + </tr> + + <tr> + <td>1</td> + <td>1</td> + </tr> + + <tr> + <td>2</td> + <td>0</td> + </tr> +</table> + +<p>Note that this is the same data that would contained in a profile +record with sample count = 0, num_pcs = 1, and a one-element call +chain containing the address 0. + + +<h2>Text List of Mapped Objects</h2> + +<p>The binary data in the file is followed immediately by a list of +mapped objects. This list consists of lines of text separated by +newline characters. + +<p>Each line is one of the following types: + +<ul> + <li>Build specifier, starting with "<tt>build=</tt>". For example: + <pre> build=/path/to/binary</pre> + Leading spaces on the line are ignored. + + <li>Mapping line from ProcMapsIterator::FormatLine. For example: + <pre> 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so</pre> + The first address must start at the beginning of the line. +</ul> + +<p>Unrecognized lines should be ignored by analysis tools. + +<p>When processing the paths see in mapping lines, occurrences of +<tt>$build</tt> followed by a non-word character (i.e., characters +other than underscore or alphanumeric characters), should be replaced +by the path given on the last build specifier line. + +<hr> +<address>Chris Demetriou<br> +<!-- Created: Mon Aug 27 12:18:26 PDT 2007 --> +<!-- hhmts start --> +Last modified: Mon Aug 27 12:18:26 PDT 2007 (cgd) +<!-- hhmts end --> +</address> +</BODY> +</HTML> diff --git a/src/third_party/gperftools-2.7/docs/cpuprofile.html b/src/third_party/gperftools-2.7/docs/cpuprofile.html new file mode 100644 index 00000000000..c81feb6ae1f --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/cpuprofile.html @@ -0,0 +1,536 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<HTML> + +<HEAD> + <link rel="stylesheet" href="designstyle.css"> + <title>Gperftools CPU Profiler</title> +</HEAD> + +<BODY> + +<p align=right> + <i>Last modified + <script type=text/javascript> + var lm = new Date(document.lastModified); + document.write(lm.toDateString()); + </script></i> +</p> + +<p>This is the CPU profiler we use at Google. There are three parts +to using it: linking the library into an application, running the +code, and analyzing the output.</p> + +<p>On the off-chance that you should need to understand it, the CPU +profiler data file format is documented separately, +<a href="cpuprofile-fileformat.html">here</a>. + + +<H1>Linking in the Library</H1> + +<p>To install the CPU profiler into your executable, add +<code>-lprofiler</code> to the link-time step for your executable. +(It's also probably possible to add in the profiler at run-time using +<code>LD_PRELOAD</code>, e.g. +<code>% env LD_PRELOAD="/usr/lib/libprofiler.so" <binary></code>, +but this isn't necessarily recommended.)</p> + +<p>This does <i>not</i> turn on CPU profiling; it just inserts the +code. For that reason, it's practical to just always link +<code>-lprofiler</code> into a binary while developing; that's what we +do at Google. (However, since any user can turn on the profiler by +setting an environment variable, it's not necessarily recommended to +install profiler-linked binaries into a production, running +system.)</p> + + +<H1>Running the Code</H1> + +<p>There are several alternatives to actually turn on CPU profiling +for a given run of an executable:</p> + +<ol> + <li> <p>Define the environment variable CPUPROFILE to the filename + to dump the profile to. For instance, if you had a version of + <code>/bin/ls</code> that had been linked against libprofiler, + you could run:</p> + <pre>% env CPUPROFILE=ls.prof /bin/ls</pre> + </li> + <li> <p>In addition to defining the environment variable CPUPROFILE + you can also define CPUPROFILESIGNAL. This allows profiling to be + controlled via the signal number that you specify. The signal number + must be unused by the program under normal operation. Internally it + acts as a switch, triggered by the signal, which is off by default. + For instance, if you had a copy of <code>/bin/chrome</code> that had been + been linked against libprofiler, you could run:</p> + <pre>% env CPUPROFILE=chrome.prof CPUPROFILESIGNAL=12 /bin/chrome &</pre> + <p>You can then trigger profiling to start:</p> + <pre>% killall -12 chrome</pre> + <p>Then after a period of time you can tell it to stop which will + generate the profile:</p> + <pre>% killall -12 chrome</pre> + </li> + <li> <p>In your code, bracket the code you want profiled in calls to + <code>ProfilerStart()</code> and <code>ProfilerStop()</code>. + (These functions are declared in <code><gperftools/profiler.h></code>.) + <code>ProfilerStart()</code> will take + the profile-filename as an argument.</p> + </li> +</ol> + +<p>In Linux 2.6 and above, profiling works correctly with threads, +automatically profiling all threads. In Linux 2.4, profiling only +profiles the main thread (due to a kernel bug involving itimers and +threads). Profiling works correctly with sub-processes: each child +process gets its own profile with its own name (generated by combining +CPUPROFILE with the child's process id).</p> + +<p>For security reasons, CPU profiling will not write to a file -- and +is thus not usable -- for setuid programs.</p> + +<p>See the include-file <code>gperftools/profiler.h</code> for +advanced-use functions, including <code>ProfilerFlush()</code> and +<code>ProfilerStartWithOptions()</code>.</p> + + +<H2>Modifying Runtime Behavior</H2> + +<p>You can more finely control the behavior of the CPU profiler via +environment variables.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>CPUPROFILE_FREQUENCY=<i>x</i></code></td> + <td>default: 100</td> + <td> + How many interrupts/second the cpu-profiler samples. + </td> +</tr> + +<tr valign=top> + <td><code>CPUPROFILE_REALTIME=1</code></td> + <td>default: [not set]</td> + <td> + If set to any value (including 0 or the empty string), use + ITIMER_REAL instead of ITIMER_PROF to gather profiles. In + general, ITIMER_REAL is not as accurate as ITIMER_PROF, and also + interacts badly with use of alarm(), so prefer ITIMER_PROF unless + you have a reason prefer ITIMER_REAL. + </td> +</tr> + +</table> + + +<h1><a name="pprof">Analyzing the Output</a></h1> + +<p><code>pprof</code> is the script used to analyze a profile. It has +many output modes, both textual and graphical. Some give just raw +numbers, much like the <code>-pg</code> output of <code>gcc</code>, +and others show the data in the form of a dependency graph.</p> + +<p>pprof <b>requires</b> <code>perl5</code> to be installed to run. +It also requires <code>dot</code> to be installed for any of the +graphical output routines, and <code>gv</code> to be installed for +<code>--gv</code> mode (described below). +</p> + +<p>Here are some ways to call pprof. These are described in more +detail below.</p> + +<pre> +% pprof /bin/ls ls.prof + Enters "interactive" mode +% pprof --text /bin/ls ls.prof + Outputs one line per procedure +% pprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +% pprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +% pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +% pprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +% pprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() +% pprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +% pprof --callgrind /bin/ls ls.prof + Outputs the call information in callgrind format +</pre> + + +<h3>Analyzing Text Output</h3> + +<p>Text mode has lines of output that look like this:</p> +<pre> + 14 2.1% 17.2% 58 8.7% std::_Rb_tree::find +</pre> + +<p>Here is how to interpret the columns:</p> +<ol> + <li> Number of profiling samples in this function + <li> Percentage of profiling samples in this function + <li> Percentage of profiling samples in the functions printed so far + <li> Number of profiling samples in this function and its callees + <li> Percentage of profiling samples in this function and its callees + <li> Function name +</ol> + +<h3>Analyzing Callgrind Output</h3> + +<p>Use <a href="http://kcachegrind.sourceforge.net">kcachegrind</a> to +analyze your callgrind output:</p> +<pre> +% pprof --callgrind /bin/ls ls.prof > ls.callgrind +% kcachegrind ls.callgrind +</pre> + +<p>The cost is specified in 'hits', i.e. how many times a function +appears in the recorded call stack information. The 'calls' from +function a to b record how many times function b was found in the +stack traces directly below function a.</p> + +<p>Tip: if you use a debug build the output will include file and line +number information and kcachegrind will show an annotated source +code view.</p> + +<h3>Node Information</h3> + +<p>In the various graphical modes of pprof, the output is a call graph +annotated with timing information, like so:</p> + +<A HREF="pprof-test-big.gif"> +<center><table><tr><td> + <img src="pprof-test.gif"> +</td></tr></table></center> +</A> + +<p>Each node represents a procedure. The directed edges indicate +caller to callee relations. Each node is formatted as follows:</p> + +<center><pre> +Class Name +Method Name +local (percentage) +<b>of</b> cumulative (percentage) +</pre></center> + +<p>The last one or two lines contains the timing information. (The +profiling is done via a sampling method, where by default we take 100 +samples a second. Therefor one unit of time in the output corresponds +to about 10 milliseconds of execution time.) The "local" time is the +time spent executing the instructions directly contained in the +procedure (and in any other procedures that were inlined into the +procedure). The "cumulative" time is the sum of the "local" time and +the time spent in any callees. If the cumulative time is the same as +the local time, it is not printed.</p> + +<p>For instance, the timing information for test_main_thread() +indicates that 155 units (about 1.55 seconds) were spent executing the +code in <code>test_main_thread()</code> and 200 units were spent while +executing <code>test_main_thread()</code> and its callees such as +<code>snprintf()</code>.</p> + +<p>The size of the node is proportional to the local count. The +percentage displayed in the node corresponds to the count divided by +the total run time of the program (that is, the cumulative count for +<code>main()</code>).</p> + +<h3>Edge Information</h3> + +<p>An edge from one node to another indicates a caller to callee +relationship. Each edge is labelled with the time spent by the callee +on behalf of the caller. E.g, the edge from +<code>test_main_thread()</code> to <code>snprintf()</code> indicates +that of the 200 samples in <code>test_main_thread()</code>, 37 are +because of calls to <code>snprintf()</code>.</p> + +<p>Note that <code>test_main_thread()</code> has an edge to +<code>vsnprintf()</code>, even though <code>test_main_thread()</code> +doesn't call that function directly. This is because the code was +compiled with <code>-O2</code>; the profile reflects the optimized +control flow.</p> + +<h3>Meta Information</h3> + +<p>The top of the display should contain some meta information +like:</p> +<pre> + /tmp/profiler2_unittest + Total samples: 202 + Focusing on: 202 + Dropped nodes with <= 1 abs(samples) + Dropped edges with <= 0 samples +</pre> + +<p>This section contains the name of the program, and the total +samples collected during the profiling run. If the +<code>--focus</code> option is on (see the <a href="#focus">Focus</a> +section below), the legend also contains the number of samples being +shown in the focused display. Furthermore, some unimportant nodes and +edges are dropped to reduce clutter. The characteristics of the +dropped nodes and edges are also displayed in the legend.</p> + +<h3><a name=focus>Focus and Ignore</a></h3> + +<p>You can ask pprof to generate a display focused on a particular +piece of the program. You specify a regular expression. Any portion +of the call-graph that is on a path which contains at least one node +matching the regular expression is preserved. The rest of the +call-graph is dropped on the floor. For example, you can focus on the +<code>vsnprintf()</code> libc call in <code>profiler2_unittest</code> +as follows:</p> + +<pre> +% pprof --gv --focus=vsnprintf /tmp/profiler2_unittest test.prof +</pre> +<A HREF="pprof-vsnprintf-big.gif"> +<center><table><tr><td> + <img src="pprof-vsnprintf.gif"> +</td></tr></table></center> +</A> + +<p>Similarly, you can supply the <code>--ignore</code> option to +ignore samples that match a specified regular expression. E.g., if +you are interested in everything except calls to +<code>snprintf()</code>, you can say:</p> +<pre> +% pprof --gv --ignore=snprintf /tmp/profiler2_unittest test.prof +</pre> + + +<h3>Interactive mode</a></h3> + +<p>By default -- if you don't specify any flags to the contrary -- +pprof runs in interactive mode. At the <code>(pprof)</code> prompt, +you can run many of the commands described above. You can type +<code>help</code> for a list of what commands are available in +interactive mode.</p> + +<h3><a name=options>pprof Options</a></h3> + +For a complete list of pprof options, you can run <code>pprof +--help</code>. + +<h4>Output Type</h4> + +<p> +<center> +<table frame=box rules=sides cellpadding=5 width=100%> +<tr valign=top> + <td><code>--text</code></td> + <td> + Produces a textual listing. (Note: If you have an X display, and + <code>dot</code> and <code>gv</code> installed, you will probably + be happier with the <code>--gv</code> output.) + </td> +</tr> +<tr valign=top> + <td><code>--gv</code></td> + <td> + Generates annotated call-graph, converts to postscript, and + displays via gv (requres <code>dot</code> and <code>gv</code> be + installed). + </td> +</tr> +<tr valign=top> + <td><code>--dot</code></td> + <td> + Generates the annotated call-graph in dot format and + emits to stdout (requres <code>dot</code> be installed). + </td> +</tr> +<tr valign=top> + <td><code>--ps</code></td> + <td> + Generates the annotated call-graph in Postscript format and + emits to stdout (requres <code>dot</code> be installed). + </td> +</tr> +<tr valign=top> + <td><code>--pdf</code></td> + <td> + Generates the annotated call-graph in PDF format and emits to + stdout (requires <code>dot</code> and <code>ps2pdf</code> be + installed). + </td> +</tr> +<tr valign=top> + <td><code>--gif</code></td> + <td> + Generates the annotated call-graph in GIF format and + emits to stdout (requres <code>dot</code> be installed). + </td> +</tr> +<tr valign=top> + <td><code>--list=<<i>regexp</i>></code></td> + <td> + <p>Outputs source-code listing of routines whose + name matches <regexp>. Each line + in the listing is annotated with flat and cumulative + sample counts.</p> + + <p>In the presence of inlined calls, the samples + associated with inlined code tend to get assigned + to a line that follows the location of the + inlined call. A more precise accounting can be + obtained by disassembling the routine using the + --disasm flag.</p> + </td> +</tr> +<tr valign=top> + <td><code>--disasm=<<i>regexp</i>></code></td> + <td> + Generates disassembly of routines that match + <regexp>, annotated with flat and + cumulative sample counts and emits to stdout. + </td> +</tr> +</table> +</center> + +<h4>Reporting Granularity</h4> + +<p>By default, pprof produces one entry per procedure. However you can +use one of the following options to change the granularity of the +output. The <code>--files</code> option seems to be particularly +useless, and may be removed eventually.</p> + +<center> +<table frame=box rules=sides cellpadding=5 width=100%> +<tr valign=top> + <td><code>--addresses</code></td> + <td> + Produce one node per program address. + </td> +</tr> + <td><code>--lines</code></td> + <td> + Produce one node per source line. + </td> +</tr> + <td><code>--functions</code></td> + <td> + Produce one node per function (this is the default). + </td> +</tr> + <td><code>--files</code></td> + <td> + Produce one node per source file. + </td> +</tr> +</table> +</center> + +<h4>Controlling the Call Graph Display</h4> + +<p>Some nodes and edges are dropped to reduce clutter in the output +display. The following options control this effect:</p> + +<center> +<table frame=box rules=sides cellpadding=5 width=100%> +<tr valign=top> + <td><code>--nodecount=<n></code></td> + <td> + This option controls the number of displayed nodes. The nodes + are first sorted by decreasing cumulative count, and then only + the top N nodes are kept. The default value is 80. + </td> +</tr> +<tr valign=top> + <td><code>--nodefraction=<f></code></td> + <td> + This option provides another mechanism for discarding nodes + from the display. If the cumulative count for a node is + less than this option's value multiplied by the total count + for the profile, the node is dropped. The default value + is 0.005; i.e. nodes that account for less than + half a percent of the total time are dropped. A node + is dropped if either this condition is satisfied, or the + --nodecount condition is satisfied. + </td> +</tr> +<tr valign=top> + <td><code>--edgefraction=<f></code></td> + <td> + This option controls the number of displayed edges. First of all, + an edge is dropped if either its source or destination node is + dropped. Otherwise, the edge is dropped if the sample + count along the edge is less than this option's value multiplied + by the total count for the profile. The default value is + 0.001; i.e., edges that account for less than + 0.1% of the total time are dropped. + </td> +</tr> +<tr valign=top> + <td><code>--focus=<re></code></td> + <td> + This option controls what region of the graph is displayed + based on the regular expression supplied with the option. + For any path in the callgraph, we check all nodes in the path + against the supplied regular expression. If none of the nodes + match, the path is dropped from the output. + </td> +</tr> +<tr valign=top> + <td><code>--ignore=<re></code></td> + <td> + This option controls what region of the graph is displayed + based on the regular expression supplied with the option. + For any path in the callgraph, we check all nodes in the path + against the supplied regular expression. If any of the nodes + match, the path is dropped from the output. + </td> +</tr> +</table> +</center> + +<p>The dropped edges and nodes account for some count mismatches in +the display. For example, the cumulative count for +<code>snprintf()</code> in the first diagram above was 41. However +the local count (1) and the count along the outgoing edges (12+1+20+6) +add up to only 40.</p> + + +<h1>Caveats</h1> + +<ul> + <li> If the program exits because of a signal, the generated profile + will be <font color=red>incomplete, and may perhaps be + completely empty</font>. + <li> The displayed graph may have disconnected regions because + of the edge-dropping heuristics described above. + <li> If the program linked in a library that was not compiled + with enough symbolic information, all samples associated + with the library may be charged to the last symbol found + in the program before the library. This will artificially + inflate the count for that symbol. + <li> If you run the program on one machine, and profile it on + another, and the shared libraries are different on the two + machines, the profiling output may be confusing: samples that + fall within shared libaries may be assigned to arbitrary + procedures. + <li> If your program forks, the children will also be profiled + (since they inherit the same CPUPROFILE setting). Each process + is profiled separately; to distinguish the child profiles from + the parent profile and from each other, all children will have + their process-id appended to the CPUPROFILE name. + <li> Due to a hack we make to work around a possible gcc bug, your + profiles may end up named strangely if the first character of + your CPUPROFILE variable has ascii value greater than 127. + This should be exceedingly rare, but if you need to use such a + name, just set prepend <code>./</code> to your filename: + <code>CPUPROFILE=./Ägypten</code>. +</ul> + + +<hr> +<address>Sanjay Ghemawat<br> +<!-- Created: Tue Dec 19 10:43:14 PST 2000 --> +<!-- hhmts start --> +Last modified: Fri May 9 14:41:29 PDT 2008 +<!-- hhmts end --> +</address> +</BODY> +</HTML> diff --git a/src/third_party/gperftools-2.7/docs/designstyle.css b/src/third_party/gperftools-2.7/docs/designstyle.css new file mode 100644 index 00000000000..29299af1f49 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/designstyle.css @@ -0,0 +1,109 @@ +body { + background-color: #ffffff; + color: black; + margin-right: 1in; + margin-left: 1in; +} + + +h1, h2, h3, h4, h5, h6 { + color: #3366ff; + font-family: sans-serif; +} +@media print { + /* Darker version for printing */ + h1, h2, h3, h4, h5, h6 { + color: #000080; + font-family: helvetica, sans-serif; + } +} + +h1 { + text-align: center; + font-size: 18pt; +} +h2 { + margin-left: -0.5in; +} +h3 { + margin-left: -0.25in; +} +h4 { + margin-left: -0.125in; +} +hr { + margin-left: -1in; +} + +/* Definition lists: definition term bold */ +dt { + font-weight: bold; +} + +address { + text-align: right; +} +/* Use the <code> tag for bits of code and <var> for variables and objects. */ +code,pre,samp,var { + color: #006000; +} +/* Use the <file> tag for file and directory paths and names. */ +file { + color: #905050; + font-family: monospace; +} +/* Use the <kbd> tag for stuff the user should type. */ +kbd { + color: #600000; +} +div.note p { + float: right; + width: 3in; + margin-right: 0%; + padding: 1px; + border: 2px solid #6060a0; + background-color: #fffff0; +} + +UL.nobullets { + list-style-type: none; + list-style-image: none; + margin-left: -1em; +} + +/* pretty printing styles. See prettify.js */ +.str { color: #080; } +.kwd { color: #008; } +.com { color: #800; } +.typ { color: #606; } +.lit { color: #066; } +.pun { color: #660; } +.pln { color: #000; } +.tag { color: #008; } +.atn { color: #606; } +.atv { color: #080; } +pre.prettyprint { padding: 2px; border: 1px solid #888; } + +.embsrc { background: #eee; } + +@media print { + .str { color: #060; } + .kwd { color: #006; font-weight: bold; } + .com { color: #600; font-style: italic; } + .typ { color: #404; font-weight: bold; } + .lit { color: #044; } + .pun { color: #440; } + .pln { color: #000; } + .tag { color: #006; font-weight: bold; } + .atn { color: #404; } + .atv { color: #060; } +} + +/* Table Column Headers */ +.hdr { + color: #006; + font-weight: bold; + background-color: #dddddd; } +.hdr2 { + color: #006; + background-color: #eeeeee; }
\ No newline at end of file diff --git a/src/third_party/gperftools-2.7/docs/heap-example1.png b/src/third_party/gperftools-2.7/docs/heap-example1.png Binary files differnew file mode 100644 index 00000000000..9a14b6fb89e --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/heap-example1.png diff --git a/src/third_party/gperftools-2.7/docs/heap_checker.html b/src/third_party/gperftools-2.7/docs/heap_checker.html new file mode 100644 index 00000000000..ca05b500389 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/heap_checker.html @@ -0,0 +1,534 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<HTML> + +<HEAD> + <link rel="stylesheet" href="designstyle.css"> + <title>Gperftools Heap Leak Checker</title> +</HEAD> + +<BODY> + +<p align=right> + <i>Last modified + <script type=text/javascript> + var lm = new Date(document.lastModified); + document.write(lm.toDateString()); + </script></i> +</p> + +<p>This is the heap checker we use at Google to detect memory leaks in +C++ programs. There are three parts to using it: linking the library +into an application, running the code, and analyzing the output.</p> + + +<H1>Linking in the Library</H1> + +<p>The heap-checker is part of tcmalloc, so to install the heap +checker into your executable, add <code>-ltcmalloc</code> to the +link-time step for your executable. Also, while we don't necessarily +recommend this form of usage, it's possible to add in the profiler at +run-time using <code>LD_PRELOAD</code>:</p> +<pre>% env LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary></pre> + +<p>This does <i>not</i> turn on heap checking; it just inserts the +code. For that reason, it's practical to just always link +<code>-ltcmalloc</code> into a binary while developing; that's what we +do at Google. (However, since any user can turn on the profiler by +setting an environment variable, it's not necessarily recommended to +install heapchecker-linked binaries into a production, running +system.) Note that if you wish to use the heap checker, you must +also use the tcmalloc memory-allocation library. There is no way +currently to use the heap checker separate from tcmalloc.</p> + + +<h1>Running the Code</h1> + +<p>Note: For security reasons, heap profiling will not write to a file +-- and is thus not usable -- for setuid programs.</p> + +<h2><a name="whole_program">Whole-program Heap Leak Checking</a></h2> + +<p>The recommended way to use the heap checker is in "whole program" +mode. In this case, the heap-checker starts tracking memory +allocations before the start of <code>main()</code>, and checks again +at program-exit. If it finds any memory leaks -- that is, any memory +not pointed to by objects that are still "live" at program-exit -- it +aborts the program (via <code>exit(1)</code>) and prints a message +describing how to track down the memory leak (using <A +HREF="heapprofile.html#pprof">pprof</A>).</p> + +<p>The heap-checker records the stack trace for each allocation while +it is active. This causes a significant increase in memory usage, in +addition to slowing your program down.</p> + +<p>Here's how to run a program with whole-program heap checking:</p> + +<ol> + <li> <p>Define the environment variable HEAPCHECK to the <A + HREF="#types">type of heap-checking</A> to do. For instance, + to heap-check + <code>/usr/local/bin/my_binary_compiled_with_tcmalloc</code>:</p> + <pre>% env HEAPCHECK=normal /usr/local/bin/my_binary_compiled_with_tcmalloc</pre> +</ol> + +<p>No other action is required.</p> + +<p>Note that since the heap-checker uses the heap-profiling framework +internally, it is not possible to run both the heap-checker and <A +HREF="heapprofile.html">heap profiler</A> at the same time.</p> + + +<h3><a name="types">Flavors of Heap Checking</a></h3> + +<p>These are the legal values when running a whole-program heap +check:</p> +<ol> + <li> <code>minimal</code> + <li> <code>normal</code> + <li> <code>strict</code> + <li> <code>draconian</code> +</ol> + +<p>"Minimal" heap-checking starts as late as possible in a +initialization, meaning you can leak some memory in your +initialization routines (that run before <code>main()</code>, say), +and not trigger a leak message. If you frequently (and purposefully) +leak data in one-time global initializers, "minimal" mode is useful +for you. Otherwise, you should avoid it for stricter modes.</p> + +<p>"Normal" heap-checking tracks <A HREF="#live">live objects</A> and +reports a leak for any data that is not reachable via a live object +when the program exits.</p> + +<p>"Strict" heap-checking is much like "normal" but has a few extra +checks that memory isn't lost in global destructors. In particular, +if you have a global variable that allocates memory during program +execution, and then "forgets" about the memory in the global +destructor (say, by setting the pointer to it to NULL) without freeing +it, that will prompt a leak message in "strict" mode, though not in +"normal" mode.</p> + +<p>"Draconian" heap-checking is appropriate for those who like to be +very precise about their memory management, and want the heap-checker +to help them enforce it. In "draconian" mode, the heap-checker does +not do "live object" checking at all, so it reports a leak unless +<i>all</i> allocated memory is freed before program exit. (However, +you can use <A HREF="#disable">IgnoreObject()</A> to re-enable +liveness-checking on an object-by-object basis.)</p> + +<p>"Normal" mode, as the name implies, is the one used most often at +Google. It's appropriate for everyday heap-checking use.</p> + +<p>In addition, there are two other possible modes:</p> +<ul> + <li> <code>as-is</code> + <li> <code>local</code> +</ul> +<p><code>as-is</code> is the most flexible mode; it allows you to +specify the various <A HREF="#options">knobs</A> of the heap checker +explicitly. <code>local</code> activates the <A +HREF="#explicit">explicit heap-check instrumentation</A>, but does not +turn on any whole-program leak checking.</p> + + +<h3><A NAME="tweaking">Tweaking whole-program checking</A></h3> + +<p>In some cases you want to check the whole program for memory leaks, +but waiting for after <code>main()</code> exits to do the first +whole-program leak check is waiting too long: e.g. in a long-running +server one might wish to simply periodically check for leaks while the +server is running. In this case, you can call the static method +<code>HeapLeakChecker::NoGlobalLeaks()</code>, to verify no global leaks have happened +as of that point in the program.</p> + +<p>Alternately, doing the check after <code>main()</code> exits might +be too late. Perhaps you have some objects that are known not to +clean up properly at exit. You'd like to do the "at exit" check +before those objects are destroyed (since while they're live, any +memory they point to will not be considered a leak). In that case, +you can call <code>HeapLeakChecker::NoGlobalLeaks()</code> manually, near the end of +<code>main()</code>, and then call <code>HeapLeakChecker::CancelGlobalCheck()</code> to +turn off the automatic post-<code>main()</code> check.</p> + +<p>Finally, there's a helper macro for "strict" and "draconian" modes, +which require all global memory to be freed before program exit. This +freeing can be time-consuming and is often unnecessary, since libc +cleans up all memory at program-exit for you. If you want the +benefits of "strict"/"draconian" modes without the cost of all that +freeing, look at <code>REGISTER_HEAPCHECK_CLEANUP</code> (in +<code>heap-checker.h</code>). This macro allows you to mark specific +cleanup code as active only when the heap-checker is turned on.</p> + + +<h2><a name="explicit">Explicit (Partial-program) Heap Leak Checking</h2> + +<p>Instead of whole-program checking, you can check certain parts of your +code to verify they do not have memory leaks. This check verifies that +between two parts of a program, no memory is allocated without being freed.</p> +<p>To use this kind of checking code, bracket the code you want +checked by creating a <code>HeapLeakChecker</code> object at the +beginning of the code segment, and call +<code>NoLeaks()</code> at the end. These functions, and all others +referred to in this file, are declared in +<code><gperftools/heap-checker.h></code>. +</p> + +<p>Here's an example:</p> +<pre> + HeapLeakChecker heap_checker("test_foo"); + { + code that exercises some foo functionality; + this code should not leak memory; + } + if (!heap_checker.NoLeaks()) assert(NULL == "heap memory leak"); +</pre> + +<p>Note that adding in the <code>HeapLeakChecker</code> object merely +instruments the code for leak-checking. To actually turn on this +leak-checking on a particular run of the executable, you must still +run with the heap-checker turned on:</p> +<pre>% env HEAPCHECK=local /usr/local/bin/my_binary_compiled_with_tcmalloc</pre> +<p>If you want to do whole-program leak checking in addition to this +manual leak checking, you can run in <code>normal</code> or some other +mode instead: they'll run the "local" checks in addition to the +whole-program check.</p> + + +<h2><a name="disable">Disabling Heap-checking of Known Leaks</a></h2> + +<p>Sometimes your code has leaks that you know about and are willing +to accept. You would like the heap checker to ignore them when +checking your program. You can do this by bracketing the code in +question with an appropriate heap-checking construct:</p> +<pre> + ... + { + HeapLeakChecker::Disabler disabler; + <leaky code> + } + ... +</pre> +Any objects allocated by <code>leaky code</code> (including inside any +routines called by <code>leaky code</code>) and any objects reachable +from such objects are not reported as leaks. + +<p>Alternately, you can use <code>IgnoreObject()</code>, which takes a +pointer to an object to ignore. That memory, and everything reachable +from it (by following pointers), is ignored for the purposes of leak +checking. You can call <code>UnIgnoreObject()</code> to undo the +effects of <code>IgnoreObject()</code>.</p> + + +<h2><a name="options">Tuning the Heap Checker</h2> + +<p>The heap leak checker has many options, some that trade off running +time and accuracy, and others that increase the sensitivity at the +risk of returning false positives. For most uses, the range covered +by the <A HREF="#types">heap-check flavors</A> is enough, but in +specialized cases more control can be helpful.</p> + +<p> +These options are specified via environment varaiables. +</p> + +<p>This first set of options controls sensitivity and accuracy. These +options are ignored unless you run the heap checker in <A +HREF="#types">as-is</A> mode. + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>HEAP_CHECK_AFTER_DESTRUCTORS</code></td> + <td>Default: false</td> + <td> + When true, do the final leak check after all other global + destructors have run. When false, do it after all + <code>REGISTER_HEAPCHECK_CLEANUP</code>, typically much earlier in + the global-destructor process. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_CHECK_IGNORE_THREAD_LIVE</code></td> + <td>Default: true</td> + <td> + If true, ignore objects reachable from thread stacks and registers + (that is, do not report them as leaks). + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_CHECK_IGNORE_GLOBAL_LIVE</code></td> + <td>Default: true</td> + <td> + If true, ignore objects reachable from global variables and data + (that is, do not report them as leaks). + </td> +</tr> + +</table> + +<p>These options modify the behavior of whole-program leak +checking.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>HEAP_CHECK_MAX_LEAKS</code></td> + <td>Default: 20</td> + <td> + The maximum number of leaks to be printed to stderr (all leaks are still + emitted to file output for pprof to visualize). If negative or zero, + print all the leaks found. + </td> +</tr> + + +</table> + +<p>These options apply to all types of leak checking.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>HEAP_CHECK_IDENTIFY_LEAKS</code></td> + <td>Default: false</td> + <td> + If true, generate the addresses of the leaked objects in the + generated memory leak profile files. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_CHECK_TEST_POINTER_ALIGNMENT</code></td> + <td>Default: false</td> + <td> + If true, check all leaks to see if they might be due to the use + of unaligned pointers. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_CHECK_POINTER_SOURCE_ALIGNMENT</code></td> + <td>Default: sizeof(void*)</td> + <td> + Alignment at which all pointers in memory are supposed to be located. + Use 1 if any alignment is ok. + </td> +</tr> + +<tr valign=top> + <td><code>PPROF_PATH</code></td> + <td>Default: pprof</td> +<td> + The location of the <code>pprof</code> executable. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_CHECK_DUMP_DIRECTORY</code></td> + <td>Default: /tmp</td> + <td> + Where the heap-profile files are kept while the program is running. + </td> +</tr> + +</table> + + +<h2>Tips for Handling Detected Leaks</h2> + +<p>What do you do when the heap leak checker detects a memory leak? +First, you should run the reported <code>pprof</code> command; +hopefully, that is enough to track down the location where the leak +occurs.</p> + +<p>If the leak is a real leak, you should fix it!</p> + +<p>If you are sure that the reported leaks are not dangerous and there +is no good way to fix them, then you can use +<code>HeapLeakChecker::Disabler</code> and/or +<code>HeapLeakChecker::IgnoreObject()</code> to disable heap-checking +for certain parts of the codebase.</p> + +<p>In "strict" or "draconian" mode, leaks may be due to incomplete +cleanup in the destructors of global variables. If you don't wish to +augment the cleanup routines, but still want to run in "strict" or +"draconian" mode, consider using <A +HREF="#tweaking"><code>REGISTER_HEAPCHECK_CLEANUP</code></A>.</p> + +<h2>Hints for Debugging Detected Leaks</h2> + +<p>Sometimes it can be useful to not only know the exact code that +allocates the leaked objects, but also the addresses of the leaked objects. +Combining this e.g. with additional logging in the program +one can then track which subset of the allocations +made at a certain spot in the code are leaked. +<br/> +To get the addresses of all leaked objects + define the environment variable <code>HEAP_CHECK_IDENTIFY_LEAKS</code> + to be <code>1</code>. +The object addresses will be reported in the form of addresses +of fake immediate callers of the memory allocation routines. +Note that the performance of doing leak-checking in this mode +can be noticeably worse than the default mode. +</p> + +<p>One relatively common class of leaks that don't look real +is the case of multiple initialization. +In such cases the reported leaks are typically things that are +linked from some global objects, +which are initialized and say never modified again. +The non-obvious cause of the leak is frequently the fact that +the initialization code for these objects executes more than once. +<br/> +E.g. if the code of some <code>.cc</code> file is made to be included twice +into the binary, then the constructors for global objects defined in that file +will execute twice thus leaking the things allocated on the first run. +<br/> +Similar problems can occur if object initialization is done more explicitly +e.g. on demand by a slightly buggy code +that does not always ensure only-once initialization. +</p> + +<p> +A more rare but even more puzzling problem can be use of not properly +aligned pointers (maybe inside of not properly aligned objects). +Normally such pointers are not followed by the leak checker, +hence the objects reachable only via such pointers are reported as leaks. +If you suspect this case + define the environment variable <code>HEAP_CHECK_TEST_POINTER_ALIGNMENT</code> + to be <code>1</code> +and then look closely at the generated leak report messages. +</p> + +<h1>How It Works</h1> + +<p>When a <code>HeapLeakChecker</code> object is constructed, it dumps +a memory-usage profile named +<code><prefix>.<name>-beg.heap</code> to a temporary +directory. When <code>NoLeaks()</code> +is called (for whole-program checking, this happens automatically at +program-exit), it dumps another profile, named +<code><prefix>.<name>-end.heap</code>. +(<code><prefix></code> is typically determined automatically, +and <code><name></code> is typically <code>argv[0]</code>.) It +then compares the two profiles. If the second profile shows +more memory use than the first, the +<code>NoLeaks()</code> function will +return false. For "whole program" profiling, this will cause the +executable to abort (via <code>exit(1)</code>). In all cases, it will +print a message on how to process the dumped profiles to locate +leaks.</p> + +<h3><A name=live>Detecting Live Objects</A></h3> + +<p>At any point during a program's execution, all memory that is +accessible at that time is considered "live." This includes global +variables, and also any memory that is reachable by following pointers +from a global variable. It also includes all memory reachable from +the current stack frame and from current CPU registers (this captures +local variables). Finally, it includes the thread equivalents of +these: thread-local storage and thread heaps, memory reachable from +thread-local storage and thread heaps, and memory reachable from +thread CPU registers.</p> + +<p>In all modes except "draconian," live memory is not +considered to be a leak. We detect this by doing a liveness flood, +traversing pointers to heap objects starting from some initial memory +regions we know to potentially contain live pointer data. Note that +this flood might potentially not find some (global) live data region +to start the flood from. If you find such, please file a bug.</p> + +<p>The liveness flood attempts to treat any properly aligned byte +sequences as pointers to heap objects and thinks that it found a good +pointer whenever the current heap memory map contains an object with +the address whose byte representation we found. Some pointers into +not-at-start of object will also work here.</p> + +<p>As a result of this simple approach, it's possible (though +unlikely) for the flood to be inexact and occasionally result in +leaked objects being erroneously determined to be live. For instance, +random bit patterns can happen to look like pointers to leaked heap +objects. More likely, stale pointer data not corresponding to any +live program variables can be still present in memory regions, +especially in thread stacks. For instance, depending on how the local +<code>malloc</code> is implemented, it may reuse a heap object +address:</p> +<pre> + char* p = new char[1]; // new might return 0x80000000, say. + delete p; + new char[1]; // new might return 0x80000000 again + // This last new is a leak, but doesn't seem it: p looks like it points to it +</pre> + +<p>In other words, imprecisions in the liveness flood mean that for +any heap leak check we might miss some memory leaks. This means that +for local leak checks, we might report a memory leak in the local +area, even though the leak actually happened before the +<code>HeapLeakChecker</code> object was constructed. Note that for +whole-program checks, a leak report <i>does</i> always correspond to a +real leak (since there's no "before" to have created a false-live +object).</p> + +<p>While this liveness flood approach is not very portable and not +100% accurate, it works in most cases and saves us from writing a lot +of explicit clean up code and other hassles when dealing with thread +data.</p> + + +<h3>Visualizing Leak with <code>pprof</code></h3> + +<p> +The heap checker automatically prints basic leak info with stack traces of +leaked objects' allocation sites, as well as a pprof command line that can be +used to visualize the call-graph involved in these allocations. +The latter can be much more useful for a human +to see where/why the leaks happened, especially if the leaks are numerous. +</p> + +<h3>Leak-checking and Threads</h3> + +<p>At the time of HeapLeakChecker's construction and during +<code>NoLeaks()</code> calls, we grab a lock +and then pause all other threads so other threads do not interfere +with recording or analyzing the state of the heap.</p> + +<p>In general, leak checking works correctly in the presence of +threads. However, thread stack data liveness determination (via +<code>base/thread_lister.h</code>) does not work when the program is +running under GDB, because the ptrace functionality needed for finding +threads is already hooked to by GDB. Conversely, leak checker's +ptrace attempts might also interfere with GDB. As a result, GDB can +result in potentially false leak reports. For this reason, the +heap-checker turns itself off when running under GDB.</p> + +<p>Also, <code>thread_lister</code> only works for Linux pthreads; +leak checking is unlikely to handle other thread implementations +correctly.</p> + +<p>As mentioned in the discussion of liveness flooding, thread-stack +liveness determination might mis-classify as reachable objects that +very recently became unreachable (leaked). This can happen when the +pointers to now-logically-unreachable objects are present in the +active thread stack frame. In other words, trivial code like the +following might not produce the expected leak checking outcome +depending on how the compiled code works with the stack:</p> +<pre> + int* foo = new int [20]; + HeapLeakChecker check("a_check"); + foo = NULL; + // May fail to trigger. + if (!heap_checker.NoLeaks()) assert(NULL == "heap memory leak"); +</pre> + + +<hr> +<address>Maxim Lifantsev<br> +<!-- Created: Tue Dec 19 10:43:14 PST 2000 --> +<!-- hhmts start --> +Last modified: Fri Jul 13 13:14:33 PDT 2007 +<!-- hhmts end --> +</address> +</body> +</html> diff --git a/src/third_party/gperftools-2.7/docs/heapprofile.html b/src/third_party/gperftools-2.7/docs/heapprofile.html new file mode 100644 index 00000000000..6f508699eeb --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/heapprofile.html @@ -0,0 +1,391 @@ +<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> +<HTML> + +<HEAD> + <link rel="stylesheet" href="designstyle.css"> + <title>Gperftools Heap Profiler</title> +</HEAD> + +<BODY> + +<p align=right> + <i>Last modified + <script type=text/javascript> + var lm = new Date(document.lastModified); + document.write(lm.toDateString()); + </script></i> +</p> + +<p>This is the heap profiler we use at Google, to explore how C++ +programs manage memory. This facility can be useful for</p> +<ul> + <li> Figuring out what is in the program heap at any given time + <li> Locating memory leaks + <li> Finding places that do a lot of allocation +</ul> + +<p>The profiling system instruments all allocations and frees. It +keeps track of various pieces of information per allocation site. An +allocation site is defined as the active stack trace at the call to +<code>malloc</code>, <code>calloc</code>, <code>realloc</code>, or, +<code>new</code>.</p> + +<p>There are three parts to using it: linking the library into an +application, running the code, and analyzing the output.</p> + + +<h1>Linking in the Library</h1> + +<p>To install the heap profiler into your executable, add +<code>-ltcmalloc</code> to the link-time step for your executable. +Also, while we don't necessarily recommend this form of usage, it's +possible to add in the profiler at run-time using +<code>LD_PRELOAD</code>: +<pre>% env LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary></pre> + +<p>This does <i>not</i> turn on heap profiling; it just inserts the +code. For that reason, it's practical to just always link +<code>-ltcmalloc</code> into a binary while developing; that's what we +do at Google. (However, since any user can turn on the profiler by +setting an environment variable, it's not necessarily recommended to +install profiler-linked binaries into a production, running +system.) Note that if you wish to use the heap profiler, you must +also use the tcmalloc memory-allocation library. There is no way +currently to use the heap profiler separate from tcmalloc.</p> + + +<h1>Running the Code</h1> + +<p>There are several alternatives to actually turn on heap profiling +for a given run of an executable:</p> + +<ol> + <li> <p>Define the environment variable HEAPPROFILE to the filename + to dump the profile to. For instance, to profile + <code>/usr/local/bin/my_binary_compiled_with_tcmalloc</code>:</p> + <pre>% env HEAPPROFILE=/tmp/mybin.hprof /usr/local/bin/my_binary_compiled_with_tcmalloc</pre> + <li> <p>In your code, bracket the code you want profiled in calls to + <code>HeapProfilerStart()</code> and <code>HeapProfilerStop()</code>. + (These functions are declared in <code><gperftools/heap-profiler.h></code>.) + <code>HeapProfilerStart()</code> will take the + profile-filename-prefix as an argument. Then, as often as + you'd like before calling <code>HeapProfilerStop()</code>, you + can use <code>HeapProfilerDump()</code> or + <code>GetHeapProfile()</code> to examine the profile. In case + it's useful, <code>IsHeapProfilerRunning()</code> will tell you + whether you've already called HeapProfilerStart() or not.</p> +</ol> + + +<p>For security reasons, heap profiling will not write to a file -- +and is thus not usable -- for setuid programs.</p> + +<H2>Modifying Runtime Behavior</H2> + +<p>You can more finely control the behavior of the heap profiler via +environment variables.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>HEAP_PROFILE_ALLOCATION_INTERVAL</code></td> + <td>default: 1073741824 (1 Gb)</td> + <td> + Dump heap profiling information each time the specified number of + bytes has been allocated by the program. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_PROFILE_INUSE_INTERVAL</code></td> + <td>default: 104857600 (100 Mb)</td> + <td> + Dump heap profiling information whenever the high-water memory + usage mark increases by the specified number of bytes. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_PROFILE_TIME_INTERVAL</code></td> + <td>default: 0</td> + <td> + Dump heap profiling information each time the specified + number of seconds has elapsed. + </td> +</tr> + +<tr valign=top> + <td><code>HEAPPROFILESIGNAL</code></td> + <td>default: disabled</td> + <td> + Dump heap profiling information whenever the specified signal is sent to the + process. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_PROFILE_MMAP</code></td> + <td>default: false</td> + <td> + Profile <code>mmap</code>, <code>mremap</code> and <code>sbrk</code> + calls in addition + to <code>malloc</code>, <code>calloc</code>, <code>realloc</code>, + and <code>new</code>. <b>NOTE:</b> this causes the profiler to + profile calls internal to tcmalloc, since tcmalloc and friends use + mmap and sbrk internally for allocations. One partial solution is + to filter these allocations out when running <code>pprof</code>, + with something like + <code>pprof --ignore='DoAllocWithArena|SbrkSysAllocator::Alloc|MmapSysAllocator::Alloc</code>. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_PROFILE_ONLY_MMAP</code></td> + <td>default: false</td> + <td> + Only profile <code>mmap</code>, <code>mremap</code>, and <code>sbrk</code> + calls; do not profile + <code>malloc</code>, <code>calloc</code>, <code>realloc</code>, + or <code>new</code>. + </td> +</tr> + +<tr valign=top> + <td><code>HEAP_PROFILE_MMAP_LOG</code></td> + <td>default: false</td> + <td> + Log <code>mmap</code>/<code>munmap</code> calls. + </td> +</tr> + +</table> + +<H2>Checking for Leaks</H2> + +<p>You can use the heap profiler to manually check for leaks, for +instance by reading the profiler output and looking for large +allocations. However, for that task, it's easier to use the <A +HREF="heap_checker.html">automatic heap-checking facility</A> built +into tcmalloc.</p> + + +<h1><a name="pprof">Analyzing the Output</a></h1> + +<p>If heap-profiling is turned on in a program, the program will +periodically write profiles to the filesystem. The sequence of +profiles will be named:</p> +<pre> + <prefix>.0000.heap + <prefix>.0001.heap + <prefix>.0002.heap + ... +</pre> +<p>where <code><prefix></code> is the filename-prefix supplied +when running the code (e.g. via the <code>HEAPPROFILE</code> +environment variable). Note that if the supplied prefix +does not start with a <code>/</code>, the profile files will be +written to the program's working directory.</p> + +<p>The profile output can be viewed by passing it to the +<code>pprof</code> tool -- the same tool that's used to analyze <A +HREF="cpuprofile.html">CPU profiles</A>. + +<p>Here are some examples. These examples assume the binary is named +<code>gfs_master</code>, and a sequence of heap profile files can be +found in files named:</p> +<pre> + /tmp/profile.0001.heap + /tmp/profile.0002.heap + ... + /tmp/profile.0100.heap +</pre> + +<h3>Why is a process so big</h3> + +<pre> + % pprof --gv gfs_master /tmp/profile.0100.heap +</pre> + +<p>This command will pop-up a <code>gv</code> window that displays +the profile information as a directed graph. Here is a portion +of the resulting output:</p> + +<p><center> +<img src="heap-example1.png"> +</center></p> + +A few explanations: +<ul> +<li> <code>GFS_MasterChunk::AddServer</code> accounts for 255.6 MB + of the live memory, which is 25% of the total live memory. +<li> <code>GFS_MasterChunkTable::UpdateState</code> is directly + accountable for 176.2 MB of the live memory (i.e., it directly + allocated 176.2 MB that has not been freed yet). Furthermore, + it and its callees are responsible for 729.9 MB. The + labels on the outgoing edges give a good indication of the + amount allocated by each callee. +</ul> + +<h3>Comparing Profiles</h3> + +<p>You often want to skip allocations during the initialization phase +of a program so you can find gradual memory leaks. One simple way to +do this is to compare two profiles -- both collected after the program +has been running for a while. Specify the name of the first profile +using the <code>--base</code> option. For example:</p> +<pre> + % pprof --base=/tmp/profile.0004.heap gfs_master /tmp/profile.0100.heap +</pre> + +<p>The memory-usage in <code>/tmp/profile.0004.heap</code> will be +subtracted from the memory-usage in +<code>/tmp/profile.0100.heap</code> and the result will be +displayed.</p> + +<h3>Text display</h3> + +<pre> +% pprof --text gfs_master /tmp/profile.0100.heap + 255.6 24.7% 24.7% 255.6 24.7% GFS_MasterChunk::AddServer + 184.6 17.8% 42.5% 298.8 28.8% GFS_MasterChunkTable::Create + 176.2 17.0% 59.5% 729.9 70.5% GFS_MasterChunkTable::UpdateState + 169.8 16.4% 75.9% 169.8 16.4% PendingClone::PendingClone + 76.3 7.4% 83.3% 76.3 7.4% __default_alloc_template::_S_chunk_alloc + 49.5 4.8% 88.0% 49.5 4.8% hashtable::resize + ... +</pre> + +<p> +<ul> + <li> The first column contains the direct memory use in MB. + <li> The fourth column contains memory use by the procedure + and all of its callees. + <li> The second and fifth columns are just percentage + representations of the numbers in the first and fourth columns. + <li> The third column is a cumulative sum of the second column + (i.e., the <code>k</code>th entry in the third column is the + sum of the first <code>k</code> entries in the second column.) +</ul> + +<h3>Ignoring or focusing on specific regions</h3> + +<p>The following command will give a graphical display of a subset of +the call-graph. Only paths in the call-graph that match the regular +expression <code>DataBuffer</code> are included:</p> +<pre> +% pprof --gv --focus=DataBuffer gfs_master /tmp/profile.0100.heap +</pre> + +<p>Similarly, the following command will omit all paths subset of the +call-graph. All paths in the call-graph that match the regular +expression <code>DataBuffer</code> are discarded:</p> +<pre> +% pprof --gv --ignore=DataBuffer gfs_master /tmp/profile.0100.heap +</pre> + +<h3>Total allocations + object-level information</h3> + +<p>All of the previous examples have displayed the amount of in-use +space. I.e., the number of bytes that have been allocated but not +freed. You can also get other types of information by supplying a +flag to <code>pprof</code>:</p> + +<center> +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>--inuse_space</code></td> + <td> + Display the number of in-use megabytes (i.e. space that has + been allocated but not freed). This is the default. + </td> +</tr> + +<tr valign=top> + <td><code>--inuse_objects</code></td> + <td> + Display the number of in-use objects (i.e. number of + objects that have been allocated but not freed). + </td> +</tr> + +<tr valign=top> + <td><code>--alloc_space</code></td> + <td> + Display the number of allocated megabytes. This includes + the space that has since been de-allocated. Use this + if you want to find the main allocation sites in the + program. + </td> +</tr> + +<tr valign=top> + <td><code>--alloc_objects</code></td> + <td> + Display the number of allocated objects. This includes + the objects that have since been de-allocated. Use this + if you want to find the main allocation sites in the + program. + </td> + +</table> +</center> + + +<h3>Interactive mode</a></h3> + +<p>By default -- if you don't specify any flags to the contrary -- +pprof runs in interactive mode. At the <code>(pprof)</code> prompt, +you can run many of the commands described above. You can type +<code>help</code> for a list of what commands are available in +interactive mode.</p> + + +<h1>Caveats</h1> + +<ul> + <li> Heap profiling requires the use of libtcmalloc. This + requirement may be removed in a future version of the heap + profiler, and the heap profiler separated out into its own + library. + + <li> If the program linked in a library that was not compiled + with enough symbolic information, all samples associated + with the library may be charged to the last symbol found + in the program before the library. This will artificially + inflate the count for that symbol. + + <li> If you run the program on one machine, and profile it on + another, and the shared libraries are different on the two + machines, the profiling output may be confusing: samples that + fall within the shared libaries may be assigned to arbitrary + procedures. + + <li> Several libraries, such as some STL implementations, do their + own memory management. This may cause strange profiling + results. We have code in libtcmalloc to cause STL to use + tcmalloc for memory management (which in our tests is better + than STL's internal management), though it only works for some + STL implementations. + + <li> If your program forks, the children will also be profiled + (since they inherit the same HEAPPROFILE setting). Each + process is profiled separately; to distinguish the child + profiles from the parent profile and from each other, all + children will have their process-id attached to the HEAPPROFILE + name. + + <li> Due to a hack we make to work around a possible gcc bug, your + profiles may end up named strangely if the first character of + your HEAPPROFILE variable has ascii value greater than 127. + This should be exceedingly rare, but if you need to use such a + name, just set prepend <code>./</code> to your filename: + <code>HEAPPROFILE=./Ägypten</code>. +</ul> + +<hr> +<address>Sanjay Ghemawat +<!-- Created: Tue Dec 19 10:43:14 PST 2000 --> +</address> +</body> +</html> diff --git a/src/third_party/gperftools-2.7/docs/index.html b/src/third_party/gperftools-2.7/docs/index.html new file mode 100644 index 00000000000..7b93ed3965c --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/index.html @@ -0,0 +1,20 @@ +<HTML> + +<HEAD> +<title>Gperftools</title> +</HEAD> + +<BODY> +<ul> + <li> <A HREF="tcmalloc.html">thread-caching malloc</A> + <li> <A HREF="heap_checker.html">heap-checking using tcmalloc</A> + <li> <A HREF="heapprofile.html">heap-profiling using tcmalloc</A> + <li> <A HREF="cpuprofile.html">CPU profiler</A> +</ul> + +<hr> +Last modified: Thu Feb 2 14:40:47 PST 2012 + +</BODY> + +</HTML> diff --git a/src/third_party/gperftools-2.7/docs/overview.dot b/src/third_party/gperftools-2.7/docs/overview.dot new file mode 100644 index 00000000000..9966f56cfc8 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/overview.dot @@ -0,0 +1,15 @@ +digraph Overview { +node [shape = box] + +{rank=same +T1 [label="Thread Cache"] +Tsep [label="...", shape=plaintext] +Tn [label="Thread Cache"] +T1 -> Tsep -> Tn [style=invis] +} + +C [label="Central\nHeap"] +T1 -> C [dir=both] +Tn -> C [dir=both] + +} diff --git a/src/third_party/gperftools-2.7/docs/overview.gif b/src/third_party/gperftools-2.7/docs/overview.gif Binary files differnew file mode 100644 index 00000000000..43828dadec8 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/overview.gif diff --git a/src/third_party/gperftools-2.7/docs/pageheap.dot b/src/third_party/gperftools-2.7/docs/pageheap.dot new file mode 100644 index 00000000000..5e9aec87ef7 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pageheap.dot @@ -0,0 +1,25 @@ +digraph PageHeap { +rankdir=LR +node [shape=box, width=0.3, height=0.3] +nodesep=.05 + +heap [shape=record, height=3, label="<f0>1 page|<f1>2 pages|<f2>3 pages|...|<f128>128 pages"] +O0 [shape=record, label=""] +O1 [shape=record, label=""] +O2 [shape=record, label="{|}"] +O3 [shape=record, label="{|}"] +O4 [shape=record, label="{||}"] +O5 [shape=record, label="{||}"] +O6 [shape=record, label="{|...|}"] +O7 [shape=record, label="{|...|}"] +sep1 [shape=plaintext, label="..."] +sep2 [shape=plaintext, label="..."] +sep3 [shape=plaintext, label="..."] +sep4 [shape=plaintext, label="..."] + +heap:f0 -> O0 -> O1 -> sep1 +heap:f1 -> O2 -> O3 -> sep2 +heap:f2 -> O4 -> O5 -> sep3 +heap:f128 -> O6 -> O7 -> sep4 + +} diff --git a/src/third_party/gperftools-2.7/docs/pageheap.gif b/src/third_party/gperftools-2.7/docs/pageheap.gif Binary files differnew file mode 100644 index 00000000000..5cf00bd9cdb --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pageheap.gif diff --git a/src/third_party/gperftools-2.7/docs/pprof-test-big.gif b/src/third_party/gperftools-2.7/docs/pprof-test-big.gif Binary files differnew file mode 100644 index 00000000000..67a1240fc10 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof-test-big.gif diff --git a/src/third_party/gperftools-2.7/docs/pprof-test.gif b/src/third_party/gperftools-2.7/docs/pprof-test.gif Binary files differnew file mode 100644 index 00000000000..9eeab8ad230 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof-test.gif diff --git a/src/third_party/gperftools-2.7/docs/pprof-vsnprintf-big.gif b/src/third_party/gperftools-2.7/docs/pprof-vsnprintf-big.gif Binary files differnew file mode 100644 index 00000000000..2ab292abac5 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof-vsnprintf-big.gif diff --git a/src/third_party/gperftools-2.7/docs/pprof-vsnprintf.gif b/src/third_party/gperftools-2.7/docs/pprof-vsnprintf.gif Binary files differnew file mode 100644 index 00000000000..42a85472cae --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof-vsnprintf.gif diff --git a/src/third_party/gperftools-2.7/docs/pprof.1 b/src/third_party/gperftools-2.7/docs/pprof.1 new file mode 100644 index 00000000000..f0f6cafc1ad --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof.1 @@ -0,0 +1,131 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.23. +.TH PPROF "1" "February 2005" "pprof (part of gperftools)" Google +.SH NAME +pprof \- manual page for pprof (part of gperftools) +.SH SYNOPSIS +.B pprof +[\fIoptions\fR] \fI<program> <profile>\fR +.SH DESCRIPTION +.IP +Prints specified cpu- or heap-profile +.SH OPTIONS +.TP +\fB\-\-cum\fR +Sort by cumulative data +.TP +\fB\-\-base=\fR<base> +Subtract <base> from <profile> before display +.SS "Reporting Granularity:" +.TP +\fB\-\-addresses\fR +Report at address level +.TP +\fB\-\-lines\fR +Report at source line level +.TP +\fB\-\-functions\fR +Report at function level [default] +.TP +\fB\-\-files\fR +Report at source file level +.SS "Output type:" +.TP +\fB\-\-text\fR +Generate text report [default] +.TP +\fB\-\-gv\fR +Generate Postscript and display +.TP +\fB\-\-list=\fR<regexp> +Generate source listing of matching routines +.TP +\fB\-\-disasm=\fR<regexp> +Generate disassembly of matching routines +.TP +\fB\-\-dot\fR +Generate DOT file to stdout +.TP +\fB\-\-ps\fR +Generate Postscript to stdout +.TP +\fB\-\-pdf\fR +Generate PDF to stdout +.TP +\fB\-\-gif\fR +Generate GIF to stdout +.SS "Heap-Profile Options:" +.TP +\fB\-\-inuse_space\fR +Display in-use (mega)bytes [default] +.TP +\fB\-\-inuse_objects\fR +Display in-use objects +.TP +\fB\-\-alloc_space\fR +Display allocated (mega)bytes +.TP +\fB\-\-alloc_objects\fR +Display allocated objects +.TP +\fB\-\-show_bytes\fR +Display space in bytes +.TP +\fB\-\-drop_negative\fR +Ignore negaive differences +.SS "Call-graph Options:" +.TP +\fB\-\-nodecount=\fR<n> +Show at most so many nodes [default=80] +.TP +\fB\-\-nodefraction=\fR<f> +Hide nodes below <f>*total [default=.005] +.TP +\fB\-\-edgefraction=\fR<f> +Hide edges below <f>*total [default=.001] +.TP +\fB\-\-focus=\fR<regexp> +Focus on nodes matching <regexp> +.TP +\fB\-\-ignore=\fR<regexp> +Ignore nodes matching <regexp> +.TP +\fB\-\-scale=\fR<n> +Set GV scaling [default=0] +.SH EXAMPLES + +pprof /bin/ls ls.prof +.IP +Outputs one line per procedure +.PP +pprof \fB\-\-gv\fR /bin/ls ls.prof +.IP +Displays annotated call-graph via 'gv' +.PP +pprof \fB\-\-gv\fR \fB\-\-focus\fR=\fIMutex\fR /bin/ls ls.prof +.IP +Restricts to code paths including a .*Mutex.* entry +.PP +pprof \fB\-\-gv\fR \fB\-\-focus\fR=\fIMutex\fR \fB\-\-ignore\fR=\fIstring\fR /bin/ls ls.prof +.IP +Code paths including Mutex but not string +.PP +pprof \fB\-\-list\fR=\fIgetdir\fR /bin/ls ls.prof +.IP +Dissassembly (with per-line annotations) for getdir() +.PP +pprof \fB\-\-disasm\fR=\fIgetdir\fR /bin/ls ls.prof +.IP +Dissassembly (with per-PC annotations) for getdir() +.SH COPYRIGHT +Copyright \(co 2005 Google Inc. +.SH "SEE ALSO" +Further documentation for +.B pprof +is maintained as a web page called +.B cpu_profiler.html +and is likely installed at one of the following locations: +.IP +.B /usr/share/gperftools/cpu_profiler.html +.br +.B /usr/local/share/gperftools/cpu_profiler.html +.PP diff --git a/src/third_party/gperftools-2.7/docs/pprof.see_also b/src/third_party/gperftools-2.7/docs/pprof.see_also new file mode 100644 index 00000000000..f2caf521258 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof.see_also @@ -0,0 +1,11 @@ +[see also] +Further documentation for +.B pprof +is maintained as a web page called +.B cpu_profiler.html +and is likely installed at one of the following locations: +.IP +.B /usr/share/gperftools/cpu_profiler.html +.br +.B /usr/local/share/gperftools/cpu_profiler.html +.PP diff --git a/src/third_party/gperftools-2.7/docs/pprof_remote_servers.html b/src/third_party/gperftools-2.7/docs/pprof_remote_servers.html new file mode 100644 index 00000000000..e30e6129c54 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/pprof_remote_servers.html @@ -0,0 +1,260 @@ +<HTML> + +<HEAD> +<title>pprof and Remote Servers</title> +</HEAD> + +<BODY> + +<h1><code>pprof</code> and Remote Servers</h1> + +<p>In mid-2006, we added an experimental facility to <A +HREF="cpu_profiler.html">pprof</A>, the tool that analyzes CPU and +heap profiles. This facility allows you to collect profile +information from running applications. It makes it easy to collect +profile information without having to stop the program first, and +without having to log into the machine where the application is +running. This is meant to be used on webservers, but will work on any +application that can be modified to accept TCP connections on a port +of its choosing, and to respond to HTTP requests on that port.</p> + +<p>We do not currently have infrastructure, such as apache modules, +that you can pop into a webserver or other application to get the +necessary functionality "for free." However, it's easy to generate +the necessary data, which should allow the interested developer to add +the necessary support into his or her applications.</p> + +<p>To use <code>pprof</code> in this experimental "server" mode, you +give the script a host and port it should query, replacing the normal +commandline arguments of application + profile file:</p> +<pre> + % pprof internalweb.mycompany.com:80 +</pre> + +<p>The host must be listening on that port, and be able to accept HTTP/1.0 +requests -- sent via <code>wget</code> and <code>curl</code> -- for +several urls. The following sections list the urls that +<code>pprof</code> can send, and the responses it expects in +return.</p> + +<p>Here are examples that pprof will recognize, when you give them +on the commandline, are urls. In general, you +specify the host and a port (the port-number is required), and put +the service-name at the end of the url.:</p> +<blockquote><pre> +http://myhost:80/pprof/heap # retrieves a heap profile +http://myhost:8008/pprof/profile # retrieves a CPU profile +http://myhost:80 # retrieves a CPU profile (the default) +http://myhost:8080/ # retrieves a CPU profile (the default) +myhost:8088/pprof/growth # "http://" is optional, but port is not +http://myhost:80/myservice/pprof/heap # /pprof/heap just has to come at the end +http://myhost:80/pprof/pmuprofile # CPU profile using performance counters +</pre></blockquote> + +<h2> <code><b>/pprof/heap</b></code> </h2> + +<p><code>pprof</code> asks for the url <code>/pprof/heap</code> to +get heap information. The actual url is controlled via the variable +<code>HEAP_PAGE</code> in the <code>pprof</code> script, so you +can change it if you'd like.</p> + +<p>There are two ways to get this data. The first is to call</p> +<pre> + MallocExtension::instance()->GetHeapSample(&output); +</pre> +<p>and have the server send <code>output</code> back as an HTTP +response to <code>pprof</code>. <code>MallocExtension</code> is +defined in the header file <code>gperftools/malloc_extension.h</code>.</p> + +<p>Note this will only only work if the binary is being run with +sampling turned on (which is not the default). To do this, set the +environment variable <code>TCMALLOC_SAMPLE_PARAMETER</code> to a +positive value, such as 524288, before running.</p> + +<p>The other way is to call <code>HeapProfileStart(filename)</code> +(from <code>heap-profiler.h</code>), continue to do work, and then, +some number of seconds later, call <code>GetHeapProfile()</code> +(followed by <code>HeapProfilerStop()</code>). The server can send +the output of <code>GetHeapProfile</code> back as the HTTP response to +pprof. (Note you must <code>free()</code> this data after using it.) +This is similar to how <A HREF="#profile">profile requests</A> are +handled, below. This technique does not require the application to +run with sampling turned on.</p> + +<p>Here's an example of what the output should look like:</p> +<pre> +heap profile: 1923: 127923432 [ 1923: 127923432] @ heap_v2/524288 + 1: 312 [ 1: 312] @ 0x2aaaabaf5ccc 0x2aaaaba4cd2c 0x2aaaac08c09a + 928: 122586016 [ 928: 122586016] @ 0x2aaaabaf682c 0x400680 0x400bdd 0x2aaaab1c368a 0x2aaaab1c8f77 0x2aaaab1c0396 0x2aaaab1c86ed 0x4007ff 0x2aaaaca62afa + 1: 16 [ 1: 16] @ 0x2aaaabaf5ccc 0x2aaaabb04bac 0x2aaaabc1b262 0x2aaaabc21496 0x2aaaabc214bb +[...] +</pre> + + +<p> Older code may produce "version 1" heap profiles which look like this:<p/> +<pre> +heap profile: 14933: 791700132 [ 14933: 791700132] @ heap + 1: 848688 [ 1: 848688] @ 0xa4b142 0x7f5bfc 0x87065e 0x4056e9 0x4125f8 0x42b4f1 0x45b1ba 0x463248 0x460871 0x45cb7c 0x5f1744 0x607cee 0x5f4a5e 0x40080f 0x2aaaabad7afa + 1: 1048576 [ 1: 1048576] @ 0xa4a9b2 0x7fd025 0x4ca6d8 0x4ca814 0x4caa88 0x2aaaab104cf0 0x404e20 0x4125f8 0x42b4f1 0x45b1ba 0x463248 0x460871 0x45cb7c 0x5f1744 0x607cee 0x5f4a5e 0x40080f 0x2aaaabad7afa + 2942: 388629374 [ 2942: 388629374] @ 0xa4b142 0x4006a0 0x400bed 0x5f0cfa 0x5f1744 0x607cee 0x5f4a5e 0x40080f 0x2aaaabad7afa +[...] +</pre> +<p>pprof accepts both old and new heap profiles and automatically +detects which one you are using.</p> + +<h2> <code><b>/pprof/growth</b></code> </h2> + +<p><code>pprof</code> asks for the url <code>/pprof/growth</code> to +get heap-profiling delta (growth) information. The actual url is +controlled via the variable <code>GROWTH_PAGE</code> in the +<code>pprof</code> script, so you can change it if you'd like.</p> + +<p>The server should respond by calling</p> +<pre> + MallocExtension::instance()->GetHeapGrowthStacks(&output); +</pre> +<p>and sending <code>output</code> back as an HTTP response to +<code>pprof</code>. <code>MallocExtension</code> is defined in the +header file <code>gperftools/malloc_extension.h</code>.</p> + +<p>Here's an example, from an actual Google webserver, of what the +output should look like:</p> +<pre> +heap profile: 741: 812122112 [ 741: 812122112] @ growth + 1: 1572864 [ 1: 1572864] @ 0x87da564 0x87db8a3 0x84787a4 0x846e851 0x836d12f 0x834cd1c 0x8349ba5 0x10a3177 0x8349961 + 1: 1048576 [ 1: 1048576] @ 0x87d92e8 0x87d9213 0x87d9178 0x87d94d3 0x87da9da 0x8a364ff 0x8a437e7 0x8ab7d23 0x8ab7da9 0x8ac7454 0x8348465 0x10a3161 0x8349961 +[...] +</pre> + + +<h2> <A NAME="profile"><code><b>/pprof/profile</b></code></A> </h2> + +<p><code>pprof</code> asks for the url +<code>/pprof/profile?seconds=XX</code> to get cpu-profiling +information. The actual url is controlled via the variable +<code>PROFILE_PAGE</code> in the <code>pprof</code> script, so you can +change it if you'd like.</p> + +<p>The server should respond by calling +<code>ProfilerStart(filename)</code>, continuing to do its work, and +then, XX seconds later, calling <code>ProfilerStop()</code>. (These +functions are declared in <code>gperftools/profiler.h</code>.) The +application is responsible for picking a unique filename for +<code>ProfilerStart()</code>. After calling +<code>ProfilerStop()</code>, the server should read the contents of +<code>filename</code> and send them back as an HTTP response to +<code>pprof</code>.</p> + +<p>Obviously, to get useful profile information the application must +continue to run in the XX seconds that the profiler is running. Thus, +the profile start-stop calls should be done in a separate thread, or +be otherwise non-blocking.</p> + +<p>The profiler output file is binary, but near the end of it, it +should have lines of text somewhat like this:</p> +<pre> +01016000-01017000 rw-p 00015000 03:01 59314 /lib/ld-2.2.2.so +</pre> + +<h2> <code><b>/pprof/pmuprofile</b></code> </h2> + +<code>pprof</code> asks for a url of the form +<code>/pprof/pmuprofile?event=hw_event:unit_mask&period=nnn&seconds=xxx</code> +to get cpu-profiling information. The actual url is controlled via the variable +<code>PMUPROFILE_PAGE</code> in the <code>pprof</code> script, so you can +change it if you'd like.</p> + +<p> +This is similar to pprof, but is meant to be used with your CPU's hardware +performance counters. The server could be implemented on top of a library +such as <a href="http://perfmon2.sourceforge.net/"> +<code>libpfm</code></a>. It should collect a sample every nnn occurrences +of the event and stop the sampling after xxx seconds. Much of the code +for <code>/pprof/profile</code> can be reused for this purpose. +</p> + +<p>The server side routines (the equivalent of +ProfilerStart/ProfilerStart) are not available as part of perftools, +so this URL is unlikely to be that useful.</p> + +<h2> <code><b>/pprof/contention</b></code> </h2> + +<p>This is intended to be able to profile (thread) lock contention in +addition to CPU and memory use. It's not yet usable.</p> + + +<h2> <code><b>/pprof/cmdline</b></code> </h2> + +<p><code>pprof</code> asks for the url <code>/pprof/cmdline</code> to +figure out what application it's profiling. The actual url is +controlled via the variable <code>PROGRAM_NAME_PAGE</code> in the +<code>pprof</code> script, so you can change it if you'd like.</p> + +<p>The server should respond by reading the contents of +<code>/proc/self/cmdline</code>, converting all internal NUL (\0) +characters to newlines, and sending the result back as an HTTP +response to <code>pprof</code>.</p> + +<p>Here's an example return value:<p> +<pre> +/root/server/custom_webserver +80 +--configfile=/root/server/ws.config +</pre> + + +<h2> <code><b>/pprof/symbol</b></code> </h2> + +<p><code>pprof</code> asks for the url <code>/pprof/symbol</code> to +map from hex addresses to variable names. The actual url is +controlled via the variable <code>SYMBOL_PAGE</code> in the +<code>pprof</code> script, so you can change it if you'd like.</p> + +<p>When the server receives a GET request for +<code>/pprof/symbol</code>, it should return a line formatted like +so:</p> +<pre> + num_symbols: ### +</pre> +<p>where <code>###</code> is the number of symbols found in the +binary. (For now, the only important distinction is whether the value +is 0, which it is for executables that lack debug information, or +not-0).</p> + +<p>This is perhaps the hardest request to write code for, because in +addition to the GET request for this url, the server must accept POST +requests. This means that after the HTTP headers, pprof will pass in +a list of hex addresses connected by <code>+</code>, like so:</p> +<pre> + curl -d '0x0824d061+0x0824d1cf' http://remote_host:80/pprof/symbol +</pre> + +<p>The server should read the POST data, which will be in one line, +and for each hex value, should write one line of output to the output +stream, like so:</p> +<pre> +<hex address><tab><function name> +</pre> +<p>For instance:</p> +<pre> +0x08b2dabd _Update +</pre> + +<p>The other reason this is the most difficult request to implement, +is that the application will have to figure out for itself how to map +from address to function name. One possibility is to run <code>nm -C +-n <program name></code> to get the mappings at +program-compile-time. Another, at least on Linux, is to call out to +addr2line for every <code>pprof/symbol</code> call, for instance +<code>addr2line -Cfse /proc/<getpid>/exe 0x12345678 0x876543210</code> +(presumably with some caching!)</p> + +<p><code>pprof</code> itself does just this for local profiles (not +ones that talk to remote servers); look at the subroutine +<code>GetProcedureBoundaries</code>.</p> + + +<hr> +Last modified: Mon Jun 12 21:30:14 PDT 2006 +</body> +</html> diff --git a/src/third_party/gperftools-2.7/docs/spanmap.dot b/src/third_party/gperftools-2.7/docs/spanmap.dot new file mode 100644 index 00000000000..3cb42abe5b2 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/spanmap.dot @@ -0,0 +1,22 @@ +digraph SpanMap { +node [shape=box, width=0.3, height=0.3] +nodesep=.05 + +map [shape=record, width=6, label="<f0>|<f1>|<f2>|<f3>|<f4>|<f5>|<f6>|<f7>|<f8>|<f9>|<f10>"] +S0 [label="a"] +S1 [label="b"] +S2 [label="c"] +S3 [label="d"] +map:f0 -> S0 +map:f1 -> S0 +map:f2 -> S1 +map:f3 -> S2 +map:f4 -> S2 +map:f5 -> S2 +map:f6 -> S2 +map:f7 -> S2 +map:f8 -> S3 +map:f9 -> S3 +map:f10 -> S3 + +} diff --git a/src/third_party/gperftools-2.7/docs/spanmap.gif b/src/third_party/gperftools-2.7/docs/spanmap.gif Binary files differnew file mode 100644 index 00000000000..a0627f6a71a --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/spanmap.gif diff --git a/src/third_party/gperftools-2.7/docs/t-test1.times.txt b/src/third_party/gperftools-2.7/docs/t-test1.times.txt new file mode 100644 index 00000000000..016369385b2 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/t-test1.times.txt @@ -0,0 +1,480 @@ +time.1.ptmalloc.64:0.56 user 0.02 system 0.57 elapsed 100% CPU +time.1.tcmalloc.64:0.38 user 0.02 system 0.40 elapsed 98% CPU +time.1.ptmalloc.128:0.61 user 0.01 system 0.61 elapsed 101% CPU +time.1.tcmalloc.128:0.35 user 0.00 system 0.35 elapsed 99% CPU +time.1.ptmalloc.256:0.59 user 0.01 system 0.60 elapsed 100% CPU +time.1.tcmalloc.256:0.27 user 0.02 system 0.28 elapsed 102% CPU +time.1.ptmalloc.512:0.57 user 0.00 system 0.57 elapsed 100% CPU +time.1.tcmalloc.512:0.25 user 0.01 system 0.25 elapsed 101% CPU +time.1.ptmalloc.1024:0.52 user 0.00 system 0.52 elapsed 99% CPU +time.1.tcmalloc.1024:0.22 user 0.02 system 0.24 elapsed 97% CPU +time.1.ptmalloc.2048:0.47 user 0.00 system 0.47 elapsed 99% CPU +time.1.tcmalloc.2048:0.22 user 0.02 system 0.25 elapsed 95% CPU +time.1.ptmalloc.4096:0.48 user 0.01 system 0.48 elapsed 100% CPU +time.1.tcmalloc.4096:0.25 user 0.01 system 0.25 elapsed 100% CPU +time.1.ptmalloc.8192:0.49 user 0.02 system 0.49 elapsed 102% CPU +time.1.tcmalloc.8192:0.27 user 0.02 system 0.28 elapsed 101% CPU +time.1.ptmalloc.16384:0.51 user 0.04 system 0.55 elapsed 99% CPU +time.1.tcmalloc.16384:0.35 user 0.02 system 0.37 elapsed 100% CPU +time.1.ptmalloc.32768:0.53 user 0.14 system 0.66 elapsed 100% CPU +time.1.tcmalloc.32768:0.67 user 0.02 system 0.69 elapsed 99% CPU +time.1.ptmalloc.65536:0.68 user 0.31 system 0.98 elapsed 100% CPU +time.1.tcmalloc.65536:0.71 user 0.01 system 0.72 elapsed 99% CPU +time.1.ptmalloc.131072:0.90 user 0.72 system 1.62 elapsed 99% CPU +time.1.tcmalloc.131072:0.94 user 0.03 system 0.97 elapsed 99% CPU +time.2.ptmalloc.64:1.05 user 0.00 system 0.53 elapsed 196% CPU +time.2.tcmalloc.64:0.66 user 0.03 system 0.37 elapsed 185% CPU +time.2.ptmalloc.128:1.77 user 0.01 system 0.89 elapsed 198% CPU +time.2.tcmalloc.128:0.53 user 0.01 system 0.29 elapsed 184% CPU +time.2.ptmalloc.256:1.14 user 0.01 system 0.62 elapsed 182% CPU +time.2.tcmalloc.256:0.45 user 0.02 system 0.26 elapsed 180% CPU +time.2.ptmalloc.512:1.26 user 0.40 system 1.79 elapsed 92% CPU +time.2.tcmalloc.512:0.43 user 0.02 system 0.27 elapsed 166% CPU +time.2.ptmalloc.1024:0.98 user 0.03 system 0.56 elapsed 179% CPU +time.2.tcmalloc.1024:0.44 user 0.02 system 0.34 elapsed 134% CPU +time.2.ptmalloc.2048:0.87 user 0.02 system 0.44 elapsed 199% CPU +time.2.tcmalloc.2048:0.49 user 0.02 system 0.34 elapsed 148% CPU +time.2.ptmalloc.4096:0.92 user 0.03 system 0.48 elapsed 196% CPU +time.2.tcmalloc.4096:0.50 user 0.02 system 0.49 elapsed 105% CPU +time.2.ptmalloc.8192:1.05 user 0.04 system 0.55 elapsed 196% CPU +time.2.tcmalloc.8192:0.59 user 0.01 system 0.51 elapsed 116% CPU +time.2.ptmalloc.16384:1.30 user 0.14 system 0.72 elapsed 198% CPU +time.2.tcmalloc.16384:0.63 user 0.03 system 0.68 elapsed 96% CPU +time.2.ptmalloc.32768:1.33 user 0.56 system 1.00 elapsed 189% CPU +time.2.tcmalloc.32768:1.16 user 0.01 system 1.17 elapsed 99% CPU +time.2.ptmalloc.65536:1.86 user 1.79 system 2.01 elapsed 181% CPU +time.2.tcmalloc.65536:1.35 user 0.01 system 1.35 elapsed 100% CPU +time.2.ptmalloc.131072:2.61 user 5.19 system 4.81 elapsed 162% CPU +time.2.tcmalloc.131072:1.86 user 0.04 system 1.90 elapsed 100% CPU +time.3.ptmalloc.64:1.79 user 0.03 system 0.67 elapsed 268% CPU +time.3.tcmalloc.64:1.58 user 0.04 system 0.62 elapsed 260% CPU +time.3.ptmalloc.128:2.77 user 1.34 system 3.07 elapsed 133% CPU +time.3.tcmalloc.128:1.19 user 0.01 system 0.50 elapsed 236% CPU +time.3.ptmalloc.256:2.14 user 0.02 system 0.85 elapsed 252% CPU +time.3.tcmalloc.256:0.96 user 0.01 system 0.41 elapsed 236% CPU +time.3.ptmalloc.512:3.37 user 1.31 system 3.33 elapsed 140% CPU +time.3.tcmalloc.512:0.93 user 0.04 system 0.39 elapsed 243% CPU +time.3.ptmalloc.1024:1.66 user 0.01 system 0.64 elapsed 260% CPU +time.3.tcmalloc.1024:0.81 user 0.02 system 0.44 elapsed 187% CPU +time.3.ptmalloc.2048:2.07 user 0.01 system 0.82 elapsed 252% CPU +time.3.tcmalloc.2048:1.10 user 0.04 system 0.59 elapsed 191% CPU +time.3.ptmalloc.4096:2.01 user 0.03 system 0.79 elapsed 258% CPU +time.3.tcmalloc.4096:0.87 user 0.03 system 0.65 elapsed 137% CPU +time.3.ptmalloc.8192:2.22 user 0.11 system 0.83 elapsed 280% CPU +time.3.tcmalloc.8192:0.96 user 0.06 system 0.75 elapsed 135% CPU +time.3.ptmalloc.16384:2.56 user 0.47 system 1.02 elapsed 295% CPU +time.3.tcmalloc.16384:0.99 user 0.04 system 1.03 elapsed 99% CPU +time.3.ptmalloc.32768:3.29 user 1.75 system 1.96 elapsed 256% CPU +time.3.tcmalloc.32768:1.67 user 0.02 system 1.69 elapsed 99% CPU +time.3.ptmalloc.65536:4.04 user 6.62 system 4.92 elapsed 216% CPU +time.3.tcmalloc.65536:1.91 user 0.02 system 1.98 elapsed 97% CPU +time.3.ptmalloc.131072:5.55 user 17.86 system 12.44 elapsed 188% CPU +time.3.tcmalloc.131072:2.78 user 0.02 system 2.82 elapsed 99% CPU +time.4.ptmalloc.64:3.42 user 1.36 system 3.20 elapsed 149% CPU +time.4.tcmalloc.64:2.42 user 0.02 system 0.71 elapsed 341% CPU +time.4.ptmalloc.128:3.98 user 1.79 system 3.89 elapsed 148% CPU +time.4.tcmalloc.128:1.87 user 0.02 system 0.58 elapsed 325% CPU +time.4.ptmalloc.256:4.06 user 2.14 system 4.12 elapsed 150% CPU +time.4.tcmalloc.256:1.69 user 0.02 system 0.51 elapsed 331% CPU +time.4.ptmalloc.512:4.48 user 2.15 system 4.39 elapsed 150% CPU +time.4.tcmalloc.512:1.62 user 0.03 system 0.52 elapsed 314% CPU +time.4.ptmalloc.1024:3.18 user 0.03 system 0.84 elapsed 381% CPU +time.4.tcmalloc.1024:1.53 user 0.02 system 0.56 elapsed 274% CPU +time.4.ptmalloc.2048:3.24 user 0.02 system 0.84 elapsed 384% CPU +time.4.tcmalloc.2048:1.44 user 0.04 system 0.66 elapsed 221% CPU +time.4.ptmalloc.4096:3.50 user 0.04 system 0.91 elapsed 389% CPU +time.4.tcmalloc.4096:1.31 user 0.01 system 0.89 elapsed 148% CPU +time.4.ptmalloc.8192:6.77 user 3.85 system 4.14 elapsed 256% CPU +time.4.tcmalloc.8192:1.20 user 0.05 system 0.97 elapsed 127% CPU +time.4.ptmalloc.16384:7.08 user 5.06 system 4.63 elapsed 262% CPU +time.4.tcmalloc.16384:1.27 user 0.03 system 1.25 elapsed 103% CPU +time.4.ptmalloc.32768:5.57 user 4.22 system 3.31 elapsed 295% CPU +time.4.tcmalloc.32768:2.17 user 0.03 system 2.25 elapsed 97% CPU +time.4.ptmalloc.65536:6.11 user 15.05 system 9.19 elapsed 230% CPU +time.4.tcmalloc.65536:2.51 user 0.02 system 2.57 elapsed 98% CPU +time.4.ptmalloc.131072:7.58 user 33.15 system 21.28 elapsed 191% CPU +time.4.tcmalloc.131072:3.57 user 0.07 system 3.66 elapsed 99% CPU +time.5.ptmalloc.64:4.44 user 2.08 system 4.37 elapsed 148% CPU +time.5.tcmalloc.64:2.87 user 0.02 system 0.79 elapsed 361% CPU +time.5.ptmalloc.128:4.77 user 2.77 system 5.14 elapsed 146% CPU +time.5.tcmalloc.128:2.65 user 0.03 system 0.72 elapsed 367% CPU +time.5.ptmalloc.256:5.82 user 2.88 system 5.49 elapsed 158% CPU +time.5.tcmalloc.256:2.33 user 0.01 system 0.66 elapsed 352% CPU +time.5.ptmalloc.512:6.27 user 3.11 system 5.34 elapsed 175% CPU +time.5.tcmalloc.512:2.14 user 0.03 system 0.70 elapsed 307% CPU +time.5.ptmalloc.1024:6.82 user 3.18 system 5.23 elapsed 191% CPU +time.5.tcmalloc.1024:2.20 user 0.02 system 0.70 elapsed 313% CPU +time.5.ptmalloc.2048:6.57 user 3.46 system 5.22 elapsed 192% CPU +time.5.tcmalloc.2048:2.15 user 0.03 system 0.82 elapsed 264% CPU +time.5.ptmalloc.4096:8.75 user 5.09 system 5.26 elapsed 263% CPU +time.5.tcmalloc.4096:1.68 user 0.03 system 1.08 elapsed 158% CPU +time.5.ptmalloc.8192:4.48 user 0.61 system 1.51 elapsed 335% CPU +time.5.tcmalloc.8192:1.47 user 0.07 system 1.18 elapsed 129% CPU +time.5.ptmalloc.16384:5.71 user 1.98 system 2.14 elapsed 358% CPU +time.5.tcmalloc.16384:1.58 user 0.03 system 1.52 elapsed 105% CPU +time.5.ptmalloc.32768:7.19 user 7.81 system 5.53 elapsed 270% CPU +time.5.tcmalloc.32768:2.63 user 0.05 system 2.72 elapsed 98% CPU +time.5.ptmalloc.65536:8.45 user 23.51 system 14.30 elapsed 223% CPU +time.5.tcmalloc.65536:3.12 user 0.05 system 3.21 elapsed 98% CPU +time.5.ptmalloc.131072:10.22 user 43.63 system 27.84 elapsed 193% CPU +time.5.tcmalloc.131072:4.42 user 0.07 system 4.51 elapsed 99% CPU +time.6.ptmalloc.64:5.57 user 2.56 system 5.08 elapsed 159% CPU +time.6.tcmalloc.64:3.20 user 0.01 system 0.89 elapsed 360% CPU +time.6.ptmalloc.128:5.98 user 3.52 system 5.71 elapsed 166% CPU +time.6.tcmalloc.128:2.76 user 0.02 system 0.78 elapsed 355% CPU +time.6.ptmalloc.256:4.61 user 0.02 system 1.19 elapsed 389% CPU +time.6.tcmalloc.256:2.65 user 0.02 system 0.74 elapsed 356% CPU +time.6.ptmalloc.512:8.28 user 3.88 system 6.61 elapsed 183% CPU +time.6.tcmalloc.512:2.60 user 0.02 system 0.72 elapsed 362% CPU +time.6.ptmalloc.1024:4.75 user 0.00 system 1.22 elapsed 387% CPU +time.6.tcmalloc.1024:2.56 user 0.02 system 0.79 elapsed 325% CPU +time.6.ptmalloc.2048:8.90 user 4.59 system 6.15 elapsed 219% CPU +time.6.tcmalloc.2048:2.37 user 0.06 system 0.96 elapsed 250% CPU +time.6.ptmalloc.4096:11.41 user 7.02 system 6.31 elapsed 291% CPU +time.6.tcmalloc.4096:1.82 user 0.03 system 1.19 elapsed 154% CPU +time.6.ptmalloc.8192:11.64 user 8.25 system 5.97 elapsed 332% CPU +time.6.tcmalloc.8192:1.83 user 0.07 system 1.38 elapsed 136% CPU +time.6.ptmalloc.16384:7.44 user 2.98 system 3.01 elapsed 345% CPU +time.6.tcmalloc.16384:1.83 user 0.08 system 1.80 elapsed 105% CPU +time.6.ptmalloc.32768:8.69 user 12.35 system 8.04 elapsed 261% CPU +time.6.tcmalloc.32768:3.14 user 0.06 system 3.24 elapsed 98% CPU +time.6.ptmalloc.65536:10.52 user 35.43 system 20.75 elapsed 221% CPU +time.6.tcmalloc.65536:3.62 user 0.03 system 3.72 elapsed 98% CPU +time.6.ptmalloc.131072:11.74 user 59.00 system 36.93 elapsed 191% CPU +time.6.tcmalloc.131072:5.33 user 0.04 system 5.42 elapsed 98% CPU +time.7.ptmalloc.64:6.60 user 3.45 system 6.01 elapsed 167% CPU +time.7.tcmalloc.64:3.50 user 0.04 system 0.94 elapsed 376% CPU +time.7.ptmalloc.128:7.09 user 4.25 system 6.69 elapsed 169% CPU +time.7.tcmalloc.128:3.13 user 0.03 system 0.84 elapsed 374% CPU +time.7.ptmalloc.256:9.28 user 4.85 system 7.20 elapsed 196% CPU +time.7.tcmalloc.256:3.06 user 0.02 system 0.82 elapsed 375% CPU +time.7.ptmalloc.512:9.13 user 4.78 system 6.79 elapsed 204% CPU +time.7.tcmalloc.512:2.99 user 0.03 system 0.83 elapsed 359% CPU +time.7.ptmalloc.1024:10.85 user 6.41 system 7.52 elapsed 229% CPU +time.7.tcmalloc.1024:3.05 user 0.04 system 0.89 elapsed 345% CPU +time.7.ptmalloc.2048:5.65 user 0.08 system 1.47 elapsed 388% CPU +time.7.tcmalloc.2048:3.01 user 0.01 system 0.98 elapsed 306% CPU +time.7.ptmalloc.4096:6.09 user 0.08 system 1.58 elapsed 389% CPU +time.7.tcmalloc.4096:2.25 user 0.03 system 1.32 elapsed 171% CPU +time.7.ptmalloc.8192:6.73 user 0.85 system 1.99 elapsed 379% CPU +time.7.tcmalloc.8192:2.22 user 0.08 system 1.61 elapsed 142% CPU +time.7.ptmalloc.16384:8.87 user 4.66 system 4.04 elapsed 334% CPU +time.7.tcmalloc.16384:2.07 user 0.07 system 2.07 elapsed 103% CPU +time.7.ptmalloc.32768:10.61 user 17.85 system 11.22 elapsed 253% CPU +time.7.tcmalloc.32768:3.68 user 0.06 system 3.79 elapsed 98% CPU +time.7.ptmalloc.65536:13.05 user 45.97 system 27.28 elapsed 216% CPU +time.7.tcmalloc.65536:4.16 user 0.07 system 4.31 elapsed 98% CPU +time.7.ptmalloc.131072:13.22 user 62.67 system 41.33 elapsed 183% CPU +time.7.tcmalloc.131072:6.10 user 0.06 system 6.25 elapsed 98% CPU +time.8.ptmalloc.64:7.31 user 3.92 system 6.39 elapsed 175% CPU +time.8.tcmalloc.64:4.00 user 0.01 system 1.04 elapsed 383% CPU +time.8.ptmalloc.128:9.40 user 5.41 system 7.67 elapsed 192% CPU +time.8.tcmalloc.128:3.61 user 0.02 system 0.94 elapsed 386% CPU +time.8.ptmalloc.256:10.61 user 6.35 system 7.96 elapsed 212% CPU +time.8.tcmalloc.256:3.30 user 0.02 system 0.99 elapsed 335% CPU +time.8.ptmalloc.512:12.42 user 7.10 system 8.79 elapsed 221% CPU +time.8.tcmalloc.512:3.35 user 0.04 system 0.94 elapsed 358% CPU +time.8.ptmalloc.1024:13.63 user 8.54 system 8.95 elapsed 247% CPU +time.8.tcmalloc.1024:3.44 user 0.02 system 0.96 elapsed 359% CPU +time.8.ptmalloc.2048:6.45 user 0.03 system 1.67 elapsed 386% CPU +time.8.tcmalloc.2048:3.55 user 0.05 system 1.09 elapsed 328% CPU +time.8.ptmalloc.4096:6.83 user 0.26 system 1.80 elapsed 393% CPU +time.8.tcmalloc.4096:2.78 user 0.06 system 1.53 elapsed 185% CPU +time.8.ptmalloc.8192:7.59 user 1.29 system 2.36 elapsed 376% CPU +time.8.tcmalloc.8192:2.57 user 0.07 system 1.84 elapsed 142% CPU +time.8.ptmalloc.16384:10.15 user 6.20 system 5.20 elapsed 314% CPU +time.8.tcmalloc.16384:2.40 user 0.05 system 2.42 elapsed 101% CPU +time.8.ptmalloc.32768:11.82 user 24.48 system 14.60 elapsed 248% CPU +time.8.tcmalloc.32768:4.37 user 0.05 system 4.47 elapsed 98% CPU +time.8.ptmalloc.65536:15.41 user 58.94 system 34.42 elapsed 215% CPU +time.8.tcmalloc.65536:4.90 user 0.04 system 4.96 elapsed 99% CPU +time.8.ptmalloc.131072:16.07 user 82.93 system 52.51 elapsed 188% CPU +time.8.tcmalloc.131072:7.13 user 0.04 system 7.19 elapsed 99% CPU +time.9.ptmalloc.64:8.44 user 4.59 system 6.92 elapsed 188% CPU +time.9.tcmalloc.64:4.00 user 0.02 system 1.05 elapsed 382% CPU +time.9.ptmalloc.128:10.92 user 6.14 system 8.31 elapsed 205% CPU +time.9.tcmalloc.128:3.88 user 0.02 system 1.01 elapsed 382% CPU +time.9.ptmalloc.256:13.01 user 7.75 system 9.12 elapsed 227% CPU +time.9.tcmalloc.256:3.89 user 0.01 system 1.00 elapsed 386% CPU +time.9.ptmalloc.512:14.96 user 8.89 system 9.73 elapsed 244% CPU +time.9.tcmalloc.512:3.80 user 0.03 system 1.01 elapsed 377% CPU +time.9.ptmalloc.1024:15.42 user 10.20 system 9.80 elapsed 261% CPU +time.9.tcmalloc.1024:3.86 user 0.03 system 1.19 elapsed 325% CPU +time.9.ptmalloc.2048:7.24 user 0.02 system 1.87 elapsed 388% CPU +time.9.tcmalloc.2048:3.98 user 0.05 system 1.26 elapsed 319% CPU +time.9.ptmalloc.4096:7.96 user 0.18 system 2.06 elapsed 394% CPU +time.9.tcmalloc.4096:3.27 user 0.04 system 1.69 elapsed 195% CPU +time.9.ptmalloc.8192:9.00 user 1.63 system 2.79 elapsed 380% CPU +time.9.tcmalloc.8192:3.00 user 0.06 system 2.05 elapsed 148% CPU +time.9.ptmalloc.16384:12.07 user 8.13 system 6.55 elapsed 308% CPU +time.9.tcmalloc.16384:2.85 user 0.05 system 2.75 elapsed 105% CPU +time.9.ptmalloc.32768:13.99 user 29.65 system 18.02 elapsed 242% CPU +time.9.tcmalloc.32768:4.98 user 0.06 system 5.13 elapsed 98% CPU +time.9.ptmalloc.65536:16.89 user 70.42 system 42.11 elapsed 207% CPU +time.9.tcmalloc.65536:5.55 user 0.04 system 5.65 elapsed 98% CPU +time.9.ptmalloc.131072:18.53 user 94.11 system 61.17 elapsed 184% CPU +time.9.tcmalloc.131072:8.06 user 0.04 system 8.16 elapsed 99% CPU +time.10.ptmalloc.64:9.81 user 5.70 system 7.42 elapsed 208% CPU +time.10.tcmalloc.64:4.43 user 0.03 system 1.20 elapsed 370% CPU +time.10.ptmalloc.128:12.69 user 7.81 system 9.02 elapsed 227% CPU +time.10.tcmalloc.128:4.27 user 0.02 system 1.13 elapsed 378% CPU +time.10.ptmalloc.256:15.04 user 9.53 system 9.92 elapsed 247% CPU +time.10.tcmalloc.256:4.23 user 0.02 system 1.09 elapsed 388% CPU +time.10.ptmalloc.512:17.30 user 10.46 system 10.61 elapsed 261% CPU +time.10.tcmalloc.512:4.14 user 0.05 system 1.10 elapsed 379% CPU +time.10.ptmalloc.1024:16.96 user 9.38 system 9.30 elapsed 283% CPU +time.10.tcmalloc.1024:4.27 user 0.06 system 1.18 elapsed 366% CPU +time.10.ptmalloc.2048:8.07 user 0.03 system 2.06 elapsed 393% CPU +time.10.tcmalloc.2048:4.49 user 0.07 system 1.33 elapsed 342% CPU +time.10.ptmalloc.4096:8.66 user 0.25 system 2.25 elapsed 394% CPU +time.10.tcmalloc.4096:3.61 user 0.05 system 1.78 elapsed 205% CPU +time.10.ptmalloc.8192:21.52 user 17.43 system 10.41 elapsed 374% CPU +time.10.tcmalloc.8192:3.59 user 0.10 system 2.33 elapsed 158% CPU +time.10.ptmalloc.16384:20.55 user 24.85 system 12.55 elapsed 361% CPU +time.10.tcmalloc.16384:3.29 user 0.04 system 3.22 elapsed 103% CPU +time.10.ptmalloc.32768:15.23 user 38.13 system 22.49 elapsed 237% CPU +time.10.tcmalloc.32768:5.62 user 0.05 system 5.72 elapsed 99% CPU +time.10.ptmalloc.65536:19.80 user 85.42 system 49.98 elapsed 210% CPU +time.10.tcmalloc.65536:6.23 user 0.09 system 6.36 elapsed 99% CPU +time.10.ptmalloc.131072:20.91 user 106.97 system 69.08 elapsed 185% CPU +time.10.tcmalloc.131072:8.94 user 0.09 system 9.09 elapsed 99% CPU +time.11.ptmalloc.64:10.82 user 6.34 system 7.92 elapsed 216% CPU +time.11.tcmalloc.64:4.80 user 0.03 system 1.24 elapsed 387% CPU +time.11.ptmalloc.128:14.58 user 8.61 system 9.81 elapsed 236% CPU +time.11.tcmalloc.128:4.65 user 0.03 system 1.21 elapsed 384% CPU +time.11.ptmalloc.256:17.38 user 10.98 system 10.75 elapsed 263% CPU +time.11.tcmalloc.256:4.51 user 0.03 system 1.18 elapsed 384% CPU +time.11.ptmalloc.512:19.18 user 11.71 system 10.95 elapsed 282% CPU +time.11.tcmalloc.512:4.57 user 0.02 system 1.19 elapsed 384% CPU +time.11.ptmalloc.1024:19.94 user 12.41 system 10.48 elapsed 308% CPU +time.11.tcmalloc.1024:4.71 user 0.05 system 1.29 elapsed 367% CPU +time.11.ptmalloc.2048:8.70 user 0.04 system 2.35 elapsed 371% CPU +time.11.tcmalloc.2048:4.97 user 0.07 system 1.43 elapsed 350% CPU +time.11.ptmalloc.4096:22.47 user 18.43 system 10.82 elapsed 377% CPU +time.11.tcmalloc.4096:4.22 user 0.03 system 1.91 elapsed 221% CPU +time.11.ptmalloc.8192:11.61 user 2.38 system 3.73 elapsed 374% CPU +time.11.tcmalloc.8192:3.74 user 0.09 system 2.46 elapsed 155% CPU +time.11.ptmalloc.16384:14.13 user 13.38 system 9.60 elapsed 286% CPU +time.11.tcmalloc.16384:3.61 user 0.03 system 3.63 elapsed 100% CPU +time.11.ptmalloc.32768:17.92 user 43.84 system 26.74 elapsed 230% CPU +time.11.tcmalloc.32768:6.31 user 0.03 system 6.45 elapsed 98% CPU +time.11.ptmalloc.65536:22.40 user 96.38 system 58.30 elapsed 203% CPU +time.11.tcmalloc.65536:6.92 user 0.12 system 6.98 elapsed 100% CPU +time.11.ptmalloc.131072:21.03 user 108.04 system 72.78 elapsed 177% CPU +time.11.tcmalloc.131072:9.79 user 0.08 system 9.94 elapsed 99% CPU +time.12.ptmalloc.64:12.23 user 7.16 system 8.38 elapsed 231% CPU +time.12.tcmalloc.64:5.21 user 0.05 system 1.41 elapsed 371% CPU +time.12.ptmalloc.128:16.97 user 10.19 system 10.47 elapsed 259% CPU +time.12.tcmalloc.128:5.10 user 0.02 system 1.31 elapsed 390% CPU +time.12.ptmalloc.256:19.99 user 12.10 system 11.57 elapsed 277% CPU +time.12.tcmalloc.256:5.01 user 0.03 system 1.29 elapsed 390% CPU +time.12.ptmalloc.512:21.85 user 12.66 system 11.46 elapsed 300% CPU +time.12.tcmalloc.512:5.05 user 0.00 system 1.32 elapsed 379% CPU +time.12.ptmalloc.1024:9.40 user 0.04 system 2.40 elapsed 393% CPU +time.12.tcmalloc.1024:5.14 user 0.02 system 1.39 elapsed 369% CPU +time.12.ptmalloc.2048:9.72 user 0.04 system 2.49 elapsed 391% CPU +time.12.tcmalloc.2048:5.74 user 0.05 system 1.62 elapsed 355% CPU +time.12.ptmalloc.4096:10.64 user 0.20 system 2.75 elapsed 393% CPU +time.12.tcmalloc.4096:4.45 user 0.03 system 2.04 elapsed 218% CPU +time.12.ptmalloc.8192:12.66 user 3.30 system 4.30 elapsed 371% CPU +time.12.tcmalloc.8192:4.21 user 0.13 system 2.65 elapsed 163% CPU +time.12.ptmalloc.16384:15.73 user 15.68 system 11.14 elapsed 281% CPU +time.12.tcmalloc.16384:4.17 user 0.06 system 4.10 elapsed 102% CPU +time.12.ptmalloc.32768:19.45 user 56.00 system 32.74 elapsed 230% CPU +time.12.tcmalloc.32768:6.96 user 0.08 system 7.14 elapsed 98% CPU +time.12.ptmalloc.65536:23.33 user 110.45 system 65.06 elapsed 205% CPU +time.12.tcmalloc.65536:7.77 user 0.15 system 7.72 elapsed 102% CPU +time.12.ptmalloc.131072:24.03 user 124.74 system 82.94 elapsed 179% CPU +time.12.tcmalloc.131072:10.81 user 0.06 system 10.94 elapsed 99% CPU +time.13.ptmalloc.64:14.08 user 7.60 system 8.85 elapsed 244% CPU +time.13.tcmalloc.64:5.51 user 0.01 system 1.47 elapsed 375% CPU +time.13.ptmalloc.128:18.20 user 10.98 system 10.99 elapsed 265% CPU +time.13.tcmalloc.128:5.34 user 0.01 system 1.39 elapsed 382% CPU +time.13.ptmalloc.256:21.48 user 13.94 system 12.25 elapsed 289% CPU +time.13.tcmalloc.256:5.33 user 0.01 system 1.39 elapsed 381% CPU +time.13.ptmalloc.512:24.22 user 14.84 system 12.97 elapsed 301% CPU +time.13.tcmalloc.512:5.49 user 0.02 system 1.41 elapsed 389% CPU +time.13.ptmalloc.1024:25.26 user 17.03 system 12.85 elapsed 328% CPU +time.13.tcmalloc.1024:5.65 user 0.04 system 1.50 elapsed 378% CPU +time.13.ptmalloc.2048:10.41 user 0.03 system 2.69 elapsed 387% CPU +time.13.tcmalloc.2048:5.93 user 0.10 system 1.77 elapsed 339% CPU +time.13.ptmalloc.4096:11.37 user 0.52 system 3.04 elapsed 391% CPU +time.13.tcmalloc.4096:5.08 user 0.11 system 2.22 elapsed 233% CPU +time.13.ptmalloc.8192:21.76 user 18.54 system 10.58 elapsed 380% CPU +time.13.tcmalloc.8192:5.04 user 0.16 system 2.93 elapsed 177% CPU +time.13.ptmalloc.16384:26.35 user 34.47 system 17.01 elapsed 357% CPU +time.13.tcmalloc.16384:4.66 user 0.04 system 4.66 elapsed 100% CPU +time.13.ptmalloc.32768:21.41 user 63.59 system 38.14 elapsed 222% CPU +time.13.tcmalloc.32768:7.71 user 0.03 system 7.83 elapsed 98% CPU +time.13.ptmalloc.65536:24.99 user 120.80 system 71.59 elapsed 203% CPU +time.13.tcmalloc.65536:8.87 user 0.64 system 8.37 elapsed 113% CPU +time.13.ptmalloc.131072:25.97 user 142.27 system 96.00 elapsed 175% CPU +time.13.tcmalloc.131072:11.48 user 0.06 system 11.67 elapsed 98% CPU +time.14.ptmalloc.64:15.01 user 9.11 system 9.41 elapsed 256% CPU +time.14.tcmalloc.64:5.98 user 0.02 system 1.58 elapsed 378% CPU +time.14.ptmalloc.128:20.34 user 12.72 system 11.62 elapsed 284% CPU +time.14.tcmalloc.128:5.88 user 0.04 system 1.51 elapsed 392% CPU +time.14.ptmalloc.256:24.26 user 14.95 system 12.92 elapsed 303% CPU +time.14.tcmalloc.256:5.72 user 0.02 system 1.50 elapsed 381% CPU +time.14.ptmalloc.512:27.28 user 16.45 system 13.89 elapsed 314% CPU +time.14.tcmalloc.512:5.99 user 0.02 system 1.54 elapsed 388% CPU +time.14.ptmalloc.1024:25.84 user 16.99 system 12.61 elapsed 339% CPU +time.14.tcmalloc.1024:5.94 user 0.06 system 1.59 elapsed 375% CPU +time.14.ptmalloc.2048:11.96 user 0.01 system 3.12 elapsed 382% CPU +time.14.tcmalloc.2048:6.39 user 0.07 system 1.79 elapsed 359% CPU +time.14.ptmalloc.4096:20.19 user 11.77 system 8.26 elapsed 386% CPU +time.14.tcmalloc.4096:5.65 user 0.05 system 2.32 elapsed 244% CPU +time.14.ptmalloc.8192:22.01 user 16.39 system 9.89 elapsed 387% CPU +time.14.tcmalloc.8192:5.44 user 0.11 system 3.07 elapsed 180% CPU +time.14.ptmalloc.16384:18.15 user 22.40 system 15.02 elapsed 269% CPU +time.14.tcmalloc.16384:5.29 user 0.08 system 5.34 elapsed 100% CPU +time.14.ptmalloc.32768:24.29 user 72.07 system 42.63 elapsed 225% CPU +time.14.tcmalloc.32768:8.47 user 0.02 system 8.62 elapsed 98% CPU +time.14.ptmalloc.65536:27.63 user 130.56 system 78.64 elapsed 201% CPU +time.14.tcmalloc.65536:9.85 user 1.61 system 9.04 elapsed 126% CPU +time.14.ptmalloc.131072:28.87 user 146.38 system 100.54 elapsed 174% CPU +time.14.tcmalloc.131072:12.46 user 0.11 system 12.71 elapsed 98% CPU +time.15.ptmalloc.64:16.25 user 10.05 system 9.82 elapsed 267% CPU +time.15.tcmalloc.64:6.30 user 0.02 system 1.64 elapsed 385% CPU +time.15.ptmalloc.128:22.33 user 13.23 system 12.24 elapsed 290% CPU +time.15.tcmalloc.128:6.08 user 0.03 system 1.59 elapsed 384% CPU +time.15.ptmalloc.256:26.56 user 16.57 system 13.70 elapsed 314% CPU +time.15.tcmalloc.256:6.14 user 0.03 system 1.61 elapsed 382% CPU +time.15.ptmalloc.512:29.68 user 18.08 system 14.56 elapsed 327% CPU +time.15.tcmalloc.512:6.12 user 0.04 system 1.68 elapsed 364% CPU +time.15.ptmalloc.1024:17.07 user 6.22 system 6.26 elapsed 371% CPU +time.15.tcmalloc.1024:6.38 user 0.02 system 1.75 elapsed 364% CPU +time.15.ptmalloc.2048:26.64 user 17.25 system 11.51 elapsed 381% CPU +time.15.tcmalloc.2048:6.77 user 0.18 system 1.92 elapsed 361% CPU +time.15.ptmalloc.4096:13.21 user 0.74 system 3.57 elapsed 390% CPU +time.15.tcmalloc.4096:6.03 user 0.09 system 2.36 elapsed 258% CPU +time.15.ptmalloc.8192:22.92 user 17.51 system 10.50 elapsed 385% CPU +time.15.tcmalloc.8192:5.96 user 0.12 system 3.36 elapsed 180% CPU +time.15.ptmalloc.16384:19.37 user 24.87 system 16.69 elapsed 264% CPU +time.15.tcmalloc.16384:5.88 user 0.07 system 5.84 elapsed 101% CPU +time.15.ptmalloc.32768:25.43 user 82.30 system 48.98 elapsed 219% CPU +time.15.tcmalloc.32768:9.11 user 0.05 system 9.30 elapsed 98% CPU +time.15.ptmalloc.65536:29.31 user 140.07 system 83.78 elapsed 202% CPU +time.15.tcmalloc.65536:8.51 user 1.59 system 9.75 elapsed 103% CPU +time.15.ptmalloc.131072:30.22 user 163.15 system 109.50 elapsed 176% CPU +time.15.tcmalloc.131072:13.35 user 0.10 system 13.54 elapsed 99% CPU +time.16.ptmalloc.64:17.69 user 10.11 system 10.11 elapsed 274% CPU +time.16.tcmalloc.64:6.63 user 0.04 system 1.72 elapsed 387% CPU +time.16.ptmalloc.128:23.05 user 14.37 system 12.75 elapsed 293% CPU +time.16.tcmalloc.128:6.61 user 0.02 system 1.71 elapsed 387% CPU +time.16.ptmalloc.256:29.11 user 19.35 system 14.57 elapsed 332% CPU +time.16.tcmalloc.256:6.62 user 0.03 system 1.73 elapsed 382% CPU +time.16.ptmalloc.512:31.65 user 18.71 system 14.71 elapsed 342% CPU +time.16.tcmalloc.512:6.63 user 0.04 system 1.73 elapsed 383% CPU +time.16.ptmalloc.1024:31.99 user 21.22 system 14.87 elapsed 357% CPU +time.16.tcmalloc.1024:6.81 user 0.04 system 1.79 elapsed 382% CPU +time.16.ptmalloc.2048:30.35 user 21.36 system 13.30 elapsed 388% CPU +time.16.tcmalloc.2048:6.91 user 0.50 system 2.01 elapsed 367% CPU +time.16.ptmalloc.4096:18.85 user 7.18 system 6.61 elapsed 393% CPU +time.16.tcmalloc.4096:6.70 user 0.10 system 2.62 elapsed 259% CPU +time.16.ptmalloc.8192:22.19 user 14.30 system 9.37 elapsed 389% CPU +time.16.tcmalloc.8192:6.18 user 0.19 system 3.58 elapsed 177% CPU +time.16.ptmalloc.16384:31.22 user 46.78 system 22.92 elapsed 340% CPU +time.16.tcmalloc.16384:6.79 user 0.07 system 6.86 elapsed 99% CPU +time.16.ptmalloc.32768:27.31 user 87.32 system 52.00 elapsed 220% CPU +time.16.tcmalloc.32768:9.85 user 0.06 system 10.07 elapsed 98% CPU +time.16.ptmalloc.65536:32.83 user 160.62 system 95.67 elapsed 202% CPU +time.16.tcmalloc.65536:10.18 user 0.09 system 10.41 elapsed 98% CPU +time.16.ptmalloc.131072:31.99 user 173.41 system 115.98 elapsed 177% CPU +time.16.tcmalloc.131072:14.52 user 0.05 system 14.67 elapsed 99% CPU +time.17.ptmalloc.64:19.38 user 11.61 system 10.61 elapsed 291% CPU +time.17.tcmalloc.64:7.11 user 0.02 system 1.84 elapsed 386% CPU +time.17.ptmalloc.128:26.25 user 16.15 system 13.53 elapsed 313% CPU +time.17.tcmalloc.128:6.97 user 0.02 system 1.78 elapsed 390% CPU +time.17.ptmalloc.256:30.66 user 18.36 system 14.97 elapsed 327% CPU +time.17.tcmalloc.256:6.94 user 0.04 system 1.80 elapsed 387% CPU +time.17.ptmalloc.512:33.71 user 22.79 system 15.95 elapsed 354% CPU +time.17.tcmalloc.512:7.00 user 0.02 system 1.83 elapsed 381% CPU +time.17.ptmalloc.1024:33.49 user 22.47 system 15.00 elapsed 373% CPU +time.17.tcmalloc.1024:7.20 user 0.03 system 1.90 elapsed 380% CPU +time.17.ptmalloc.2048:23.87 user 11.92 system 9.26 elapsed 386% CPU +time.17.tcmalloc.2048:6.01 user 1.83 system 2.15 elapsed 363% CPU +time.17.ptmalloc.4096:14.69 user 0.95 system 3.98 elapsed 392% CPU +time.17.tcmalloc.4096:7.25 user 0.10 system 2.62 elapsed 279% CPU +time.17.ptmalloc.8192:22.44 user 13.52 system 9.39 elapsed 382% CPU +time.17.tcmalloc.8192:7.21 user 0.24 system 3.95 elapsed 188% CPU +time.17.ptmalloc.16384:23.33 user 33.67 system 21.89 elapsed 260% CPU +time.17.tcmalloc.16384:7.28 user 0.06 system 7.10 elapsed 103% CPU +time.17.ptmalloc.32768:29.35 user 103.11 system 60.36 elapsed 219% CPU +time.17.tcmalloc.32768:10.53 user 0.07 system 10.71 elapsed 98% CPU +time.17.ptmalloc.65536:33.21 user 170.89 system 100.84 elapsed 202% CPU +time.17.tcmalloc.65536:10.85 user 0.05 system 11.04 elapsed 98% CPU +time.17.ptmalloc.131072:34.98 user 182.87 system 122.05 elapsed 178% CPU +time.17.tcmalloc.131072:15.27 user 0.09 system 15.49 elapsed 99% CPU +time.18.ptmalloc.64:21.08 user 12.15 system 11.43 elapsed 290% CPU +time.18.tcmalloc.64:7.45 user 0.03 system 1.95 elapsed 383% CPU +time.18.ptmalloc.128:27.65 user 17.26 system 14.03 elapsed 320% CPU +time.18.tcmalloc.128:7.46 user 0.03 system 1.92 elapsed 389% CPU +time.18.ptmalloc.256:32.78 user 20.55 system 15.70 elapsed 339% CPU +time.18.tcmalloc.256:7.31 user 0.02 system 1.88 elapsed 389% CPU +time.18.ptmalloc.512:33.31 user 20.06 system 15.05 elapsed 354% CPU +time.18.tcmalloc.512:7.33 user 0.02 system 1.91 elapsed 383% CPU +time.18.ptmalloc.1024:35.46 user 24.83 system 16.30 elapsed 369% CPU +time.18.tcmalloc.1024:7.60 user 0.06 system 2.05 elapsed 373% CPU +time.18.ptmalloc.2048:19.98 user 6.80 system 6.76 elapsed 395% CPU +time.18.tcmalloc.2048:6.89 user 1.29 system 2.28 elapsed 357% CPU +time.18.ptmalloc.4096:15.99 user 0.93 system 4.32 elapsed 391% CPU +time.18.tcmalloc.4096:7.70 user 0.10 system 2.77 elapsed 280% CPU +time.18.ptmalloc.8192:23.51 user 14.84 system 9.97 elapsed 384% CPU +time.18.tcmalloc.8192:8.16 user 0.27 system 4.25 elapsed 197% CPU +time.18.ptmalloc.16384:35.79 user 52.41 system 26.47 elapsed 333% CPU +time.18.tcmalloc.16384:7.81 user 0.07 system 7.61 elapsed 103% CPU +time.18.ptmalloc.32768:33.17 user 116.07 system 68.64 elapsed 217% CPU +time.18.tcmalloc.32768:11.34 user 0.13 system 11.57 elapsed 99% CPU +time.18.ptmalloc.65536:35.91 user 177.82 system 106.75 elapsed 200% CPU +time.18.tcmalloc.65536:11.54 user 0.06 system 11.74 elapsed 98% CPU +time.18.ptmalloc.131072:36.38 user 187.18 system 126.91 elapsed 176% CPU +time.18.tcmalloc.131072:16.34 user 0.05 system 16.43 elapsed 99% CPU +time.19.ptmalloc.64:22.90 user 13.23 system 11.82 elapsed 305% CPU +time.19.tcmalloc.64:7.81 user 0.02 system 2.01 elapsed 388% CPU +time.19.ptmalloc.128:30.13 user 18.58 system 14.77 elapsed 329% CPU +time.19.tcmalloc.128:7.74 user 0.02 system 2.01 elapsed 386% CPU +time.19.ptmalloc.256:35.33 user 21.41 system 16.35 elapsed 347% CPU +time.19.tcmalloc.256:7.79 user 0.04 system 2.04 elapsed 382% CPU +time.19.ptmalloc.512:39.30 user 26.22 system 17.84 elapsed 367% CPU +time.19.tcmalloc.512:7.80 user 0.06 system 2.05 elapsed 381% CPU +time.19.ptmalloc.1024:35.70 user 23.90 system 15.66 elapsed 380% CPU +time.19.tcmalloc.1024:8.08 user 0.06 system 2.16 elapsed 376% CPU +time.19.ptmalloc.2048:18.33 user 3.28 system 5.47 elapsed 394% CPU +time.19.tcmalloc.2048:8.71 user 0.05 system 2.40 elapsed 363% CPU +time.19.ptmalloc.4096:16.94 user 0.89 system 4.64 elapsed 383% CPU +time.19.tcmalloc.4096:8.21 user 0.07 system 2.85 elapsed 289% CPU +time.19.ptmalloc.8192:25.61 user 17.15 system 11.33 elapsed 377% CPU +time.19.tcmalloc.8192:8.79 user 0.30 system 4.58 elapsed 198% CPU +time.19.ptmalloc.16384:27.11 user 46.66 system 29.67 elapsed 248% CPU +time.19.tcmalloc.16384:8.64 user 0.05 system 8.58 elapsed 101% CPU +time.19.ptmalloc.32768:33.80 user 117.69 system 70.65 elapsed 214% CPU +time.19.tcmalloc.32768:11.88 user 0.07 system 12.04 elapsed 99% CPU +time.19.ptmalloc.65536:36.90 user 180.21 system 109.01 elapsed 199% CPU +time.19.tcmalloc.65536:12.17 user 0.07 system 12.40 elapsed 98% CPU +time.19.ptmalloc.131072:38.50 user 195.15 system 132.81 elapsed 175% CPU +time.19.tcmalloc.131072:17.44 user 0.10 system 17.65 elapsed 99% CPU +time.20.ptmalloc.64:23.37 user 13.74 system 11.86 elapsed 312% CPU +time.20.tcmalloc.64:8.18 user 0.02 system 2.10 elapsed 389% CPU +time.20.ptmalloc.128:31.29 user 19.97 system 15.53 elapsed 329% CPU +time.20.tcmalloc.128:8.03 user 0.02 system 2.12 elapsed 378% CPU +time.20.ptmalloc.256:38.40 user 25.65 system 18.25 elapsed 350% CPU +time.20.tcmalloc.256:8.05 user 0.05 system 2.12 elapsed 380% CPU +time.20.ptmalloc.512:40.60 user 27.70 system 18.46 elapsed 369% CPU +time.20.tcmalloc.512:8.22 user 0.08 system 2.20 elapsed 375% CPU +time.20.ptmalloc.1024:40.02 user 28.52 system 17.56 elapsed 390% CPU +time.20.tcmalloc.1024:8.50 user 0.07 system 2.19 elapsed 391% CPU +time.20.ptmalloc.2048:16.13 user 0.23 system 4.23 elapsed 386% CPU +time.20.tcmalloc.2048:8.98 user 0.03 system 2.45 elapsed 367% CPU +time.20.ptmalloc.4096:17.14 user 0.87 system 4.60 elapsed 391% CPU +time.20.tcmalloc.4096:8.93 user 0.20 system 2.97 elapsed 306% CPU +time.20.ptmalloc.8192:25.24 user 17.16 system 11.14 elapsed 380% CPU +time.20.tcmalloc.8192:9.78 user 0.30 system 5.14 elapsed 195% CPU +time.20.ptmalloc.16384:39.93 user 60.36 system 30.24 elapsed 331% CPU +time.20.tcmalloc.16384:9.57 user 0.09 system 9.43 elapsed 102% CPU +time.20.ptmalloc.32768:36.44 user 130.23 system 76.79 elapsed 217% CPU +time.20.tcmalloc.32768:12.71 user 0.09 system 12.97 elapsed 98% CPU +time.20.ptmalloc.65536:39.79 user 202.09 system 120.34 elapsed 200% CPU +time.20.tcmalloc.65536:12.93 user 0.06 system 13.15 elapsed 98% CPU +time.20.ptmalloc.131072:41.91 user 202.76 system 138.51 elapsed 176% CPU +time.20.tcmalloc.131072:18.23 user 0.07 system 18.42 elapsed 99% CPU diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.1024.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.1024.bytes.png Binary files differnew file mode 100644 index 00000000000..8c0ae6b59c2 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.1024.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.128.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.128.bytes.png Binary files differnew file mode 100644 index 00000000000..24b2a27410f --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.128.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.131072.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.131072.bytes.png Binary files differnew file mode 100644 index 00000000000..183a77b9c9f --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.131072.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.16384.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.16384.bytes.png Binary files differnew file mode 100644 index 00000000000..db59d61ae02 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.16384.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.2048.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.2048.bytes.png Binary files differnew file mode 100644 index 00000000000..169546f2406 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.2048.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.256.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.256.bytes.png Binary files differnew file mode 100644 index 00000000000..62130213c3b --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.256.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.32768.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.32768.bytes.png Binary files differnew file mode 100644 index 00000000000..18715e3a5ab --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.32768.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.4096.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.4096.bytes.png Binary files differnew file mode 100644 index 00000000000..642e245b444 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.4096.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.512.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.512.bytes.png Binary files differnew file mode 100644 index 00000000000..aea1d676471 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.512.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.64.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.64.bytes.png Binary files differnew file mode 100644 index 00000000000..3a080de7533 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.64.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.65536.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.65536.bytes.png Binary files differnew file mode 100644 index 00000000000..48ebdb63e8a --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.65536.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.8192.bytes.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.8192.bytes.png Binary files differnew file mode 100644 index 00000000000..3a99cbc8b1f --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspercpusec.vs.threads.8192.bytes.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.1.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.1.threads.png Binary files differnew file mode 100644 index 00000000000..37d406d81f6 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.1.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.12.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.12.threads.png Binary files differnew file mode 100644 index 00000000000..d45458ac1a0 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.12.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.16.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.16.threads.png Binary files differnew file mode 100644 index 00000000000..e8a3c9ff5f5 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.16.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.2.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.2.threads.png Binary files differnew file mode 100644 index 00000000000..52d7aee4903 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.2.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.20.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.20.threads.png Binary files differnew file mode 100644 index 00000000000..da0328a6a7b --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.20.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.3.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.3.threads.png Binary files differnew file mode 100644 index 00000000000..1093e81ca6b --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.3.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.4.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.4.threads.png Binary files differnew file mode 100644 index 00000000000..d7c79ef328c --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.4.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.5.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.5.threads.png Binary files differnew file mode 100644 index 00000000000..779eec60bc0 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.5.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.8.threads.png b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.8.threads.png Binary files differnew file mode 100644 index 00000000000..76c125ae8b2 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc-opspersec.vs.size.8.threads.png diff --git a/src/third_party/gperftools-2.7/docs/tcmalloc.html b/src/third_party/gperftools-2.7/docs/tcmalloc.html new file mode 100644 index 00000000000..33b8cc5fbb3 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/tcmalloc.html @@ -0,0 +1,778 @@ +<!doctype html public "-//w3c//dtd html 4.01 transitional//en"> +<!-- $Id: $ --> +<html> +<head> +<title>TCMalloc : Thread-Caching Malloc</title> +<link rel="stylesheet" href="designstyle.css"> +<style type="text/css"> + em { + color: red; + font-style: normal; + } +</style> +</head> +<body> + +<h1>TCMalloc : Thread-Caching Malloc</h1> + +<address>Sanjay Ghemawat</address> + +<h2><A name=motivation>Motivation</A></h2> + +<p>TCMalloc is faster than the glibc 2.3 malloc (available as a +separate library called ptmalloc2) and other mallocs that I have +tested. ptmalloc2 takes approximately 300 nanoseconds to execute a +malloc/free pair on a 2.8 GHz P4 (for small objects). The TCMalloc +implementation takes approximately 50 nanoseconds for the same +operation pair. Speed is important for a malloc implementation +because if malloc is not fast enough, application writers are inclined +to write their own custom free lists on top of malloc. This can lead +to extra complexity, and more memory usage unless the application +writer is very careful to appropriately size the free lists and +scavenge idle objects out of the free list.</p> + +<p>TCMalloc also reduces lock contention for multi-threaded programs. +For small objects, there is virtually zero contention. For large +objects, TCMalloc tries to use fine grained and efficient spinlocks. +ptmalloc2 also reduces lock contention by using per-thread arenas but +there is a big problem with ptmalloc2's use of per-thread arenas. In +ptmalloc2 memory can never move from one arena to another. This can +lead to huge amounts of wasted space. For example, in one Google +application, the first phase would allocate approximately 300MB of +memory for its URL canonicalization data structures. When the first +phase finished, a second phase would be started in the same address +space. If this second phase was assigned a different arena than the +one used by the first phase, this phase would not reuse any of the +memory left after the first phase and would add another 300MB to the +address space. Similar memory blowup problems were also noticed in +other applications.</p> + +<p>Another benefit of TCMalloc is space-efficient representation of +small objects. For example, N 8-byte objects can be allocated while +using space approximately <code>8N * 1.01</code> bytes. I.e., a +one-percent space overhead. ptmalloc2 uses a four-byte header for +each object and (I think) rounds up the size to a multiple of 8 bytes +and ends up using <code>16N</code> bytes.</p> + + +<h2><A NAME="Usage">Usage</A></h2> + +<p>To use TCMalloc, just link TCMalloc into your application via the +"-ltcmalloc" linker flag.</p> + +<p>You can use TCMalloc in applications you didn't compile yourself, +by using LD_PRELOAD:</p> +<pre> + $ LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary> +</pre> +<p>LD_PRELOAD is tricky, and we don't necessarily recommend this mode +of usage.</p> + +<p>TCMalloc includes a <A HREF="heap_checker.html">heap checker</A> +and <A HREF="heapprofile.html">heap profiler</A> as well.</p> + +<p>If you'd rather link in a version of TCMalloc that does not include +the heap profiler and checker (perhaps to reduce binary size for a +static binary), you can link in <code>libtcmalloc_minimal</code> +instead.</p> + + +<h2><A NAME="Overview">Overview</A></h2> + +<p>TCMalloc assigns each thread a thread-local cache. Small +allocations are satisfied from the thread-local cache. Objects are +moved from central data structures into a thread-local cache as +needed, and periodic garbage collections are used to migrate memory +back from a thread-local cache into the central data structures.</p> +<center><img src="overview.gif"></center> + +<p>TCMalloc treats objects with size <= 256K ("small" objects) +differently from larger objects. Large objects are allocated directly +from the central heap using a page-level allocator (a page is a 8K +aligned region of memory). I.e., a large object is always +page-aligned and occupies an integral number of pages.</p> + +<p>A run of pages can be carved up into a sequence of small objects, +each equally sized. For example a run of one page (4K) can be carved +up into 32 objects of size 128 bytes each.</p> + + +<h2><A NAME="Small_Object_Allocation">Small Object Allocation</A></h2> + +<p>Each small object size maps to one of approximately 88 allocatable +size-classes. For example, all allocations in the range 961 to 1024 +bytes are rounded up to 1024. The size-classes are spaced so that +small sizes are separated by 8 bytes, larger sizes by 16 bytes, even +larger sizes by 32 bytes, and so forth. The maximal spacing is +controlled so that not too much space is wasted when an allocation +request falls just past the end of a size class and has to be rounded +up to the next class.</p> + +<p>A thread cache contains a singly linked list of free objects per +size-class.</p> +<center><img src="threadheap.gif"></center> + +<p>When allocating a small object: (1) We map its size to the +corresponding size-class. (2) Look in the corresponding free list in +the thread cache for the current thread. (3) If the free list is not +empty, we remove the first object from the list and return it. When +following this fast path, TCMalloc acquires no locks at all. This +helps speed-up allocation significantly because a lock/unlock pair +takes approximately 100 nanoseconds on a 2.8 GHz Xeon.</p> + +<p>If the free list is empty: (1) We fetch a bunch of objects from a +central free list for this size-class (the central free list is shared +by all threads). (2) Place them in the thread-local free list. (3) +Return one of the newly fetched objects to the applications.</p> + +<p>If the central free list is also empty: (1) We allocate a run of +pages from the central page allocator. (2) Split the run into a set +of objects of this size-class. (3) Place the new objects on the +central free list. (4) As before, move some of these objects to the +thread-local free list.</p> + +<h3><A NAME="Sizing_Thread_Cache_Free_Lists"> + Sizing Thread Cache Free Lists</A></h3> + +<p>It is important to size the thread cache free lists correctly. If +the free list is too small, we'll need to go to the central free list +too often. If the free list is too big, we'll waste memory as objects +sit idle in the free list.</p> + +<p>Note that the thread caches are just as important for deallocation +as they are for allocation. Without a cache, each deallocation would +require moving the memory to the central free list. Also, some threads +have asymmetric alloc/free behavior (e.g. producer and consumer threads), +so sizing the free list correctly gets trickier.</p> + +<p>To size the free lists appropriately, we use a slow-start algorithm +to determine the maximum length of each individual free list. As the +free list is used more frequently, its maximum length grows. However, +if a free list is used more for deallocation than allocation, its +maximum length will grow only up to a point where the whole list can +be efficiently moved to the central free list at once.</p> + +<p>The psuedo-code below illustrates this slow-start algorithm. Note +that <code>num_objects_to_move</code> is specific to each size class. +By moving a list of objects with a well-known length, the central +cache can efficiently pass these lists between thread caches. If +a thread cache wants fewer than <code>num_objects_to_move</code>, +the operation on the central free list has linear time complexity. +The downside of always using <code>num_objects_to_move</code> as +the number of objects to transfer to and from the central cache is +that it wastes memory in threads that don't need all of those objects. + +<pre> +Start each freelist max_length at 1. + +Allocation + if freelist empty { + fetch min(max_length, num_objects_to_move) from central list; + if max_length < num_objects_to_move { // slow-start + max_length++; + } else { + max_length += num_objects_to_move; + } + } + +Deallocation + if length > max_length { + // Don't try to release num_objects_to_move if we don't have that many. + release min(max_length, num_objects_to_move) objects to central list + if max_length < num_objects_to_move { + // Slow-start up to num_objects_to_move. + max_length++; + } else if max_length > num_objects_to_move { + // If we consistently go over max_length, shrink max_length. + overages++; + if overages > kMaxOverages { + max_length -= num_objects_to_move; + overages = 0; + } + } + } +</pre> + +See also the section on <a href="#Garbage_Collection">Garbage Collection</a> +to see how it affects the <code>max_length</code>. + +<h2><A NAME="Medium_Object_Allocation">Medium Object Allocation</A></h2> + +<p>A medium object size (256K ≤ size ≤ 1MB) is rounded up to a page +size (8K) and is handled by a central page heap. The central page heap +includes an array of 128 free lists. The <code>k</code>th entry is a +free list of runs that consist of <code>k + 1</code> pages:</p> +<center><img src="pageheap.gif"></center> + +<p>An allocation for <code>k</code> pages is satisfied by looking in +the <code>k</code>th free list. If that free list is empty, we look +in the next free list, and so forth. If no medium-object free list +can satisfy the allocation, the allocation is treated as a large object. + + +<h2><A NAME="Large_Object_Allocation">Large Object Allocation</A></h2> + +Allocations of 1MB or more are considered large allocations. Spans +of free memory which can satisfy these allocations are tracked in +a red-black tree sorted by size. Allocations follow the <em>best-fit</em> +algorithm: the tree is searched to find the smallest span of free +space which is larger than the requested allocation. The allocation +is carved out of that span, and the remaining space is reinserted +either into the large object tree or possibly into one of the smaller +free-lists as appropriate. + +If no span of free memory is located that can fit the requested +allocation, we fetch memory from the system (using <code>sbrk</code>, +<code>mmap</code>, or by mapping in portions of +<code>/dev/mem</code>).</p> + +<p>If an allocation for <code>k</code> pages is satisfied by a run +of pages of length > <code>k</code>, the remainder of the +run is re-inserted back into the appropriate free list in the +page heap.</p> + + +<h2><A NAME="Spans">Spans</A></h2> + +<p>The heap managed by TCMalloc consists of a set of pages. A run of +contiguous pages is represented by a <code>Span</code> object. A span +can either be <em>allocated</em>, or <em>free</em>. If free, the span +is one of the entries in a page heap linked-list. If allocated, it is +either a large object that has been handed off to the application, or +a run of pages that have been split up into a sequence of small +objects. If split into small objects, the size-class of the objects +is recorded in the span.</p> + +<p>A central array indexed by page number can be used to find the span to +which a page belongs. For example, span <em>a</em> below occupies 2 +pages, span <em>b</em> occupies 1 page, span <em>c</em> occupies 5 +pages and span <em>d</em> occupies 3 pages.</p> +<center><img src="spanmap.gif"></center> + +<p>In a 32-bit address space, the central array is represented by a a +2-level radix tree where the root contains 32 entries and each leaf +contains 2^14 entries (a 32-bit address space has 2^19 8K pages, and +the first level of tree divides the 2^19 pages by 2^5). This leads to +a starting memory usage of 64KB of space (2^14*4 bytes) for the +central array, which seems acceptable.</p> + +<p>On 64-bit machines, we use a 3-level radix tree.</p> + + +<h2><A NAME="Deallocation">Deallocation</A></h2> + +<p>When an object is deallocated, we compute its page number and look +it up in the central array to find the corresponding span object. The +span tells us whether or not the object is small, and its size-class +if it is small. If the object is small, we insert it into the +appropriate free list in the current thread's thread cache. If the +thread cache now exceeds a predetermined size (2MB by default), we run +a garbage collector that moves unused objects from the thread cache +into central free lists.</p> + +<p>If the object is large, the span tells us the range of pages covered +by the object. Suppose this range is <code>[p,q]</code>. We also +lookup the spans for pages <code>p-1</code> and <code>q+1</code>. If +either of these neighboring spans are free, we coalesce them with the +<code>[p,q]</code> span. The resulting span is inserted into the +appropriate free list in the page heap.</p> + + +<h2>Central Free Lists for Small Objects</h2> + +<p>As mentioned before, we keep a central free list for each +size-class. Each central free list is organized as a two-level data +structure: a set of spans, and a linked list of free objects per +span.</p> + +<p>An object is allocated from a central free list by removing the +first entry from the linked list of some span. (If all spans have +empty linked lists, a suitably sized span is first allocated from the +central page heap.)</p> + +<p>An object is returned to a central free list by adding it to the +linked list of its containing span. If the linked list length now +equals the total number of small objects in the span, this span is now +completely free and is returned to the page heap.</p> + + +<h2><A NAME="Garbage_Collection">Garbage Collection of Thread Caches</A></h2> + +<p>Garbage collecting objects from a thread cache keeps the size of +the cache under control and returns unused objects to the central free +lists. Some threads need large caches to perform well while others +can get by with little or no cache at all. When a thread cache goes +over its <code>max_size</code>, garbage collection kicks in and then the +thread competes with the other threads for a larger cache.</p> + +<p>Garbage collection is run only during a deallocation. We walk over +all free lists in the cache and move some number of objects from the +free list to the corresponding central list.</p> + +<p>The number of objects to be moved from a free list is determined +using a per-list low-water-mark <code>L</code>. <code>L</code> +records the minimum length of the list since the last garbage +collection. Note that we could have shortened the list by +<code>L</code> objects at the last garbage collection without +requiring any extra accesses to the central list. We use this past +history as a predictor of future accesses and move <code>L/2</code> +objects from the thread cache free list to the corresponding central +free list. This algorithm has the nice property that if a thread +stops using a particular size, all objects of that size will quickly +move from the thread cache to the central free list where they can be +used by other threads.</p> + +<p>If a thread consistently deallocates more objects of a certain size +than it allocates, this <code>L/2</code> behavior will cause at least +<code>L/2</code> objects to always sit in the free list. To avoid +wasting memory this way, we shrink the maximum length of the freelist +to converge on <code>num_objects_to_move</code> (see also +<a href="#Sizing_Thread_Cache_Free_Lists">Sizing Thread Cache Free Lists</a>). + +<pre> +Garbage Collection + if (L != 0 && max_length > num_objects_to_move) { + max_length = max(max_length - num_objects_to_move, num_objects_to_move) + } +</pre> + +<p>The fact that the thread cache went over its <code>max_size</code> is +an indication that the thread would benefit from a larger cache. Simply +increasing <code>max_size</code> would use an inordinate amount of memory +in programs that have lots of active threads. Developers can bound the +memory used with the flag --tcmalloc_max_total_thread_cache_bytes.</p> + +<p>Each thread cache starts with a small <code>max_size</code> +(e.g. 64KB) so that idle threads won't pre-allocate memory they don't +need. Each time the cache runs a garbage collection, it will also try +to grow its <code>max_size</code>. If the sum of the thread cache +sizes is less than --tcmalloc_max_total_thread_cache_bytes, +<code>max_size</code> grows easily. If not, thread cache 1 will try +to steal from thread cache 2 (picked round-robin) by decreasing thread +cache 2's <code>max_size</code>. In this way, threads that are more +active will steal memory from other threads more often than they are +have memory stolen from themselves. Mostly idle threads end up with +small caches and active threads end up with big caches. Note that +this stealing can cause the sum of the thread cache sizes to be +greater than --tcmalloc_max_total_thread_cache_bytes until thread +cache 2 deallocates some memory to trigger a garbage collection.</p> + +<h2><A NAME="performance">Performance Notes</A></h2> + +<h3>PTMalloc2 unittest</h3> + +<p>The PTMalloc2 package (now part of glibc) contains a unittest +program <code>t-test1.c</code>. This forks a number of threads and +performs a series of allocations and deallocations in each thread; the +threads do not communicate other than by synchronization in the memory +allocator.</p> + +<p><code>t-test1</code> (included in +<code>tests/tcmalloc/</code>, and compiled as +<code>ptmalloc_unittest1</code>) was run with a varying numbers of +threads (1-20) and maximum allocation sizes (64 bytes - +32Kbytes). These tests were run on a 2.4GHz dual Xeon system with +hyper-threading enabled, using Linux glibc-2.3.2 from RedHat 9, with +one million operations per thread in each test. In each case, the test +was run once normally, and once with +<code>LD_PRELOAD=libtcmalloc.so</code>. + +<p>The graphs below show the performance of TCMalloc vs PTMalloc2 for +several different metrics. Firstly, total operations (millions) per +elapsed second vs max allocation size, for varying numbers of +threads. The raw data used to generate these graphs (the output of the +<code>time</code> utility) is available in +<code>t-test1.times.txt</code>.</p> + +<table> +<tr> + <td><img src="tcmalloc-opspersec.vs.size.1.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.2.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.3.threads.png"></td> +</tr> +<tr> + <td><img src="tcmalloc-opspersec.vs.size.4.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.5.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.8.threads.png"></td> +</tr> +<tr> + <td><img src="tcmalloc-opspersec.vs.size.12.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.16.threads.png"></td> + <td><img src="tcmalloc-opspersec.vs.size.20.threads.png"></td> +</tr> +</table> + + +<ul> + <li> TCMalloc is much more consistently scalable than PTMalloc2 - for + all thread counts >1 it achieves ~7-9 million ops/sec for small + allocations, falling to ~2 million ops/sec for larger + allocations. The single-thread case is an obvious outlier, + since it is only able to keep a single processor busy and hence + can achieve fewer ops/sec. PTMalloc2 has a much higher variance + on operations/sec - peaking somewhere around 4 million ops/sec + for small allocations and falling to <1 million ops/sec for + larger allocations. + + <li> TCMalloc is faster than PTMalloc2 in the vast majority of + cases, and particularly for small allocations. Contention + between threads is less of a problem in TCMalloc. + + <li> TCMalloc's performance drops off as the allocation size + increases. This is because the per-thread cache is + garbage-collected when it hits a threshold (defaulting to + 2MB). With larger allocation sizes, fewer objects can be stored + in the cache before it is garbage-collected. + + <li> There is a noticeable drop in TCMalloc's performance at ~32K + maximum allocation size; at larger sizes performance drops less + quickly. This is due to the 32K maximum size of objects in the + per-thread caches; for objects larger than this TCMalloc + allocates from the central page heap. +</ul> + +<p>Next, operations (millions) per second of CPU time vs number of +threads, for max allocation size 64 bytes - 128 Kbytes.</p> + +<table> +<tr> + <td><img src="tcmalloc-opspercpusec.vs.threads.64.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.256.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.1024.bytes.png"></td> +</tr> +<tr> + <td><img src="tcmalloc-opspercpusec.vs.threads.4096.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.8192.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.16384.bytes.png"></td> +</tr> +<tr> + <td><img src="tcmalloc-opspercpusec.vs.threads.32768.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.65536.bytes.png"></td> + <td><img src="tcmalloc-opspercpusec.vs.threads.131072.bytes.png"></td> +</tr> +</table> + +<p>Here we see again that TCMalloc is both more consistent and more +efficient than PTMalloc2. For max allocation sizes <32K, TCMalloc +typically achieves ~2-2.5 million ops per second of CPU time with a +large number of threads, whereas PTMalloc achieves generally 0.5-1 +million ops per second of CPU time, with a lot of cases achieving much +less than this figure. Above 32K max allocation size, TCMalloc drops +to 1-1.5 million ops per second of CPU time, and PTMalloc drops almost +to zero for large numbers of threads (i.e. with PTMalloc, lots of CPU +time is being burned spinning waiting for locks in the heavily +multi-threaded case).</p> + + +<H2><A NAME="runtime">Modifying Runtime Behavior</A></H2> + +<p>You can more finely control the behavior of the tcmalloc via +environment variables.</p> + +<p>Generally useful flags:</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>TCMALLOC_SAMPLE_PARAMETER</code></td> + <td>default: 0</td> + <td> + The approximate gap between sampling actions. That is, we + take one sample approximately once every + <code>tcmalloc_sample_parmeter</code> bytes of allocation. + This sampled heap information is available via + <code>MallocExtension::GetHeapSample()</code> or + <code>MallocExtension::ReadStackTraces()</code>. A reasonable + value is 524288. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_RELEASE_RATE</code></td> + <td>default: 1.0</td> + <td> + Rate at which we release unused memory to the system, via + <code>madvise(MADV_DONTNEED)</code>, on systems that support + it. Zero means we never release memory back to the system. + Increase this flag to return memory faster; decrease it + to return memory slower. Reasonable rates are in the + range [0,10]. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD</code></td> + <td>default: 1073741824</td> + <td> + Allocations larger than this value cause a stack trace to be + dumped to stderr. The threshold for dumping stack traces is + increased by a factor of 1.125 every time we print a message so + that the threshold automatically goes up by a factor of ~1000 + every 60 messages. This bounds the amount of extra logging + generated by this flag. Default value of this flag is very large + and therefore you should see no extra logging unless the flag is + overridden. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES</code></td> + <td>default: 16777216</td> + <td> + Bound on the total amount of bytes allocated to thread caches. This + bound is not strict, so it is possible for the cache to go over this + bound in certain circumstances. This value defaults to 16MB. For + applications with many threads, this may not be a large enough cache, + which can affect performance. If you suspect your application is not + scaling to many threads due to lock contention in TCMalloc, you can + try increasing this value. This may improve performance, at a cost + of extra memory use by TCMalloc. See <a href="#Garbage_Collection"> + Garbage Collection</a> for more details. + </td> +</tr> + +</table> + +<p>Advanced "tweaking" flags, that control more precisely how tcmalloc +tries to allocate memory from the kernel.</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>TCMALLOC_SKIP_MMAP</code></td> + <td>default: false</td> + <td> + If true, do not try to use <code>mmap</code> to obtain memory + from the kernel. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_SKIP_SBRK</code></td> + <td>default: false</td> + <td> + If true, do not try to use <code>sbrk</code> to obtain memory + from the kernel. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_START</code></td> + <td>default: 0</td> + <td> + Physical memory starting location in MB for <code>/dev/mem</code> + allocation. Setting this to 0 disables <code>/dev/mem</code> + allocation. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_LIMIT</code></td> + <td>default: 0</td> + <td> + Physical memory limit location in MB for <code>/dev/mem</code> + allocation. Setting this to 0 means no limit. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_DEVMEM_DEVICE</code></td> + <td>default: /dev/mem</td> + <td> + Device to use for allocating unmanaged memory. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MEMFS_MALLOC_PATH</code></td> + <td>default: ""</td> + <td> + If set, specify a path where hugetlbfs or tmpfs is mounted. + This may allow for speedier allocations. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MEMFS_LIMIT_MB</code></td> + <td>default: 0</td> + <td> + Limit total memfs allocation size to specified number of MB. + 0 means "no limit". + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MEMFS_ABORT_ON_FAIL</code></td> + <td>default: false</td> + <td> + If true, abort() whenever memfs_malloc fails to satisfy an allocation. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MEMFS_IGNORE_MMAP_FAIL</code></td> + <td>default: false</td> + <td> + If true, ignore failures from mmap. + </td> +</tr> + +<tr valign=top> + <td><code>TCMALLOC_MEMFS_MAP_PRIVATE</code></td> + <td>default: false</td> + <td> + If true, use MAP_PRIVATE when mapping via memfs, not MAP_SHARED. + </td> +</tr> + +</table> + + +<H2><A NAME="compiletime">Modifying Behavior In Code</A></H2> + +<p>The <code>MallocExtension</code> class, in +<code>malloc_extension.h</code>, provides a few knobs that you can +tweak in your program, to affect tcmalloc's behavior.</p> + +<h3>Releasing Memory Back to the System</h3> + +<p>By default, tcmalloc will release no-longer-used memory back to the +kernel gradually, over time. The <a +href="#runtime">tcmalloc_release_rate</a> flag controls how quickly +this happens. You can also force a release at a given point in the +progam execution like so:</p> +<pre> + MallocExtension::instance()->ReleaseFreeMemory(); +</pre> + +<p>You can also call <code>SetMemoryReleaseRate()</code> to change the +<code>tcmalloc_release_rate</code> value at runtime, or +<code>GetMemoryReleaseRate</code> to see what the current release rate +is.</p> + +<h3>Memory Introspection</h3> + +<p>There are several routines for getting a human-readable form of the +current memory usage:</p> +<pre> + MallocExtension::instance()->GetStats(buffer, buffer_length); + MallocExtension::instance()->GetHeapSample(&string); + MallocExtension::instance()->GetHeapGrowthStacks(&string); +</pre> + +<p>The last two create files in the same format as the heap-profiler, +and can be passed as data files to pprof. The first is human-readable +and is meant for debugging.</p> + +<h3>Generic Tcmalloc Status</h3> + +<p>TCMalloc has support for setting and retrieving arbitrary +'properties':</p> +<pre> + MallocExtension::instance()->SetNumericProperty(property_name, value); + MallocExtension::instance()->GetNumericProperty(property_name, &value); +</pre> + +<p>It is possible for an application to set and get these properties, +but the most useful is when a library sets the properties so the +application can read them. Here are the properties TCMalloc defines; +you can access them with a call like +<code>MallocExtension::instance()->GetNumericProperty("generic.heap_size", +&value);</code>:</p> + +<table frame=box rules=sides cellpadding=5 width=100%> + +<tr valign=top> + <td><code>generic.current_allocated_bytes</code></td> + <td> + Number of bytes used by the application. This will not typically + match the memory use reported by the OS, because it does not + include TCMalloc overhead or memory fragmentation. + </td> +</tr> + +<tr valign=top> + <td><code>generic.heap_size</code></td> + <td> + Bytes of system memory reserved by TCMalloc. + </td> +</tr> + +<tr valign=top> + <td><code>tcmalloc.pageheap_free_bytes</code></td> + <td> + Number of bytes in free, mapped pages in page heap. These bytes + can be used to fulfill allocation requests. They always count + towards virtual memory usage, and unless the underlying memory is + swapped out by the OS, they also count towards physical memory + usage. + </td> +</tr> + +<tr valign=top> + <td><code>tcmalloc.pageheap_unmapped_bytes</code></td> + <td> + Number of bytes in free, unmapped pages in page heap. These are + bytes that have been released back to the OS, possibly by one of + the MallocExtension "Release" calls. They can be used to fulfill + allocation requests, but typically incur a page fault. They + always count towards virtual memory usage, and depending on the + OS, typically do not count towards physical memory usage. + </td> +</tr> + +<tr valign=top> + <td><code>tcmalloc.slack_bytes</code></td> + <td> + Sum of pageheap_free_bytes and pageheap_unmapped_bytes. Provided + for backwards compatibility only. Do not use. + </td> +</tr> + +<tr valign=top> + <td><code>tcmalloc.max_total_thread_cache_bytes</code></td> + <td> + A limit to how much memory TCMalloc dedicates for small objects. + Higher numbers trade off more memory use for -- in some situations + -- improved efficiency. + </td> +</tr> + +<tr valign=top> + <td><code>tcmalloc.current_total_thread_cache_bytes</code></td> + <td> + A measure of some of the memory TCMalloc is using (for + small objects). + </td> +</tr> + +</table> + +<h2><A NAME="caveats">Caveats</A></h2> + +<p>For some systems, TCMalloc may not work correctly with +applications that aren't linked against <code>libpthread.so</code> (or +the equivalent on your OS). It should work on Linux using glibc 2.3, +but other OS/libc combinations have not been tested.</p> + +<p>TCMalloc may be somewhat more memory hungry than other mallocs, +(but tends not to have the huge blowups that can happen with other +mallocs). In particular, at startup TCMalloc allocates approximately +240KB of internal memory.</p> + +<p>Don't try to load TCMalloc into a running binary (e.g., using JNI +in Java programs). The binary will have allocated some objects using +the system malloc, and may try to pass them to TCMalloc for +deallocation. TCMalloc will not be able to handle such objects.</p> + +<hr> + +<address>Sanjay Ghemawat, Paul Menage<br> +<!-- Created: Tue Dec 19 10:43:14 PST 2000 --> +<!-- hhmts start --> +Last modified: Sat Feb 24 13:11:38 PST 2007 (csilvers) +<!-- hhmts end --> +</address> + +</body> +</html> diff --git a/src/third_party/gperftools-2.7/docs/threadheap.dot b/src/third_party/gperftools-2.7/docs/threadheap.dot new file mode 100644 index 00000000000..b2dba72038d --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/threadheap.dot @@ -0,0 +1,21 @@ +digraph ThreadHeap { +rankdir=LR +node [shape=box, width=0.3, height=0.3] +nodesep=.05 + +heap [shape=record, height=2, label="<f0>class 0|<f1>class 1|<f2>class 2|..."] +O0 [label=""] +O1 [label=""] +O2 [label=""] +O3 [label=""] +O4 [label=""] +O5 [label=""] +sep1 [shape=plaintext, label="..."] +sep2 [shape=plaintext, label="..."] +sep3 [shape=plaintext, label="..."] + +heap:f0 -> O0 -> O1 -> sep1 +heap:f1 -> O2 -> O3 -> sep2 +heap:f2 -> O4 -> O5 -> sep3 + +} diff --git a/src/third_party/gperftools-2.7/docs/threadheap.gif b/src/third_party/gperftools-2.7/docs/threadheap.gif Binary files differnew file mode 100644 index 00000000000..c43d0a31018 --- /dev/null +++ b/src/third_party/gperftools-2.7/docs/threadheap.gif diff --git a/src/third_party/gperftools-2.7/src/addressmap-inl.h b/src/third_party/gperftools-2.7/src/addressmap-inl.h new file mode 100644 index 00000000000..fd1dc5b6ffe --- /dev/null +++ b/src/third_party/gperftools-2.7/src/addressmap-inl.h @@ -0,0 +1,422 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A fast map from addresses to values. Assumes that addresses are +// clustered. The main use is intended to be for heap-profiling. +// May be too memory-hungry for other uses. +// +// We use a user-defined allocator/de-allocator so that we can use +// this data structure during heap-profiling. +// +// IMPLEMENTATION DETAIL: +// +// Some default definitions/parameters: +// * Block -- aligned 128-byte region of the address space +// * Cluster -- aligned 1-MB region of the address space +// * Block-ID -- block-number within a cluster +// * Cluster-ID -- Starting address of cluster divided by cluster size +// +// We use a three-level map to represent the state: +// 1. A hash-table maps from a cluster-ID to the data for that cluster. +// 2. For each non-empty cluster we keep an array indexed by +// block-ID tht points to the first entry in the linked-list +// for the block. +// 3. At the bottom, we keep a singly-linked list of all +// entries in a block (for non-empty blocks). +// +// hash table +// +-------------+ +// | id->cluster |---> ... +// | ... | +// | id->cluster |---> Cluster +// +-------------+ +-------+ Data for one block +// | nil | +------------------------------------+ +// | ----+---|->[addr/value]-->[addr/value]-->... | +// | nil | +------------------------------------+ +// | ----+--> ... +// | nil | +// | ... | +// +-------+ +// +// Note that we require zero-bytes of overhead for completely empty +// clusters. The minimum space requirement for a cluster is the size +// of the hash-table entry plus a pointer value for each block in +// the cluster. Empty blocks impose no extra space requirement. +// +// The cost of a lookup is: +// a. A hash-table lookup to find the cluster +// b. An array access in the cluster structure +// c. A traversal over the linked-list for a block + +#ifndef BASE_ADDRESSMAP_INL_H_ +#define BASE_ADDRESSMAP_INL_H_ + +#include "config.h" +#include <stddef.h> +#include <string.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uint16_t might be defined +#else +#include <sys/types.h> // our last best hope +#endif + +// This class is thread-unsafe -- that is, instances of this class can +// not be accessed concurrently by multiple threads -- because the +// callback function for Iterate() may mutate contained values. If the +// callback functions you pass do not mutate their Value* argument, +// AddressMap can be treated as thread-compatible -- that is, it's +// safe for multiple threads to call "const" methods on this class, +// but not safe for one thread to call const methods on this class +// while another thread is calling non-const methods on the class. +template <class Value> +class AddressMap { + public: + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + typedef const void* Key; + + // Create an AddressMap that uses the specified allocator/deallocator. + // The allocator/deallocator should behave like malloc/free. + // For instance, the allocator does not need to return initialized memory. + AddressMap(Allocator alloc, DeAllocator dealloc); + ~AddressMap(); + + // If the map contains an entry for "key", return it. Else return NULL. + inline const Value* Find(Key key) const; + inline Value* FindMutable(Key key); + + // Insert <key,value> into the map. Any old value associated + // with key is forgotten. + void Insert(Key key, Value value); + + // Remove any entry for key in the map. If an entry was found + // and removed, stores the associated value in "*removed_value" + // and returns true. Else returns false. + bool FindAndRemove(Key key, Value* removed_value); + + // Similar to Find but we assume that keys are addresses of non-overlapping + // memory ranges whose sizes are given by size_func. + // If the map contains a range into which "key" points + // (at its start or inside of it, but not at the end), + // return the address of the associated value + // and store its key in "*res_key". + // Else return NULL. + // max_size specifies largest range size possibly in existence now. + typedef size_t (*ValueSizeFunc)(const Value& v); + const Value* FindInside(ValueSizeFunc size_func, size_t max_size, + Key key, Key* res_key); + + // Iterate over the address map calling 'callback' + // for all stored key-value pairs and passing 'arg' to it. + // We don't use full Closure/Callback machinery not to add + // unnecessary dependencies to this class with low-level uses. + template<class Type> + inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; + + private: + typedef uintptr_t Number; + + // The implementation assumes that addresses inserted into the map + // will be clustered. We take advantage of this fact by splitting + // up the address-space into blocks and using a linked-list entry + // for each block. + + // Size of each block. There is one linked-list for each block, so + // do not make the block-size too big. Oterwise, a lot of time + // will be spent traversing linked lists. + static const int kBlockBits = 7; + static const int kBlockSize = 1 << kBlockBits; + + // Entry kept in per-block linked-list + struct Entry { + Entry* next; + Key key; + Value value; + }; + + // We further group a sequence of consecutive blocks into a cluster. + // The data for a cluster is represented as a dense array of + // linked-lists, one list per contained block. + static const int kClusterBits = 13; + static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); + static const int kClusterBlocks = 1 << kClusterBits; + + // We use a simple chaining hash-table to represent the clusters. + struct Cluster { + Cluster* next; // Next cluster in hash table chain + Number id; // Cluster ID + Entry* blocks[kClusterBlocks]; // Per-block linked-lists + }; + + // Number of hash-table entries. With the block-size/cluster-size + // defined above, each cluster covers 1 MB, so an 4K entry + // hash-table will give an average hash-chain length of 1 for 4GB of + // in-use memory. + static const int kHashBits = 12; + static const int kHashSize = 1 << 12; + + // Number of entry objects allocated at a time + static const int ALLOC_COUNT = 64; + + Cluster** hashtable_; // The hash-table + Entry* free_; // Free list of unused Entry objects + + // Multiplicative hash function: + // The value "kHashMultiplier" is the bottom 32 bits of + // int((sqrt(5)-1)/2 * 2^32) + // This is a good multiplier as suggested in CLR, Knuth. The hash + // value is taken to be the top "k" bits of the bottom 32 bits + // of the muliplied value. + static const uint32_t kHashMultiplier = 2654435769u; + static int HashInt(Number x) { + // Multiply by a constant and take the top bits of the result. + const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier; + return static_cast<int>(m >> (32 - kHashBits)); + } + + // Find cluster object for specified address. If not found + // and "create" is true, create the object. If not found + // and "create" is false, return NULL. + // + // This method is bitwise-const if create is false. + Cluster* FindCluster(Number address, bool create) { + // Look in hashtable + const Number cluster_id = address >> (kBlockBits + kClusterBits); + const int h = HashInt(cluster_id); + for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + if (c->id == cluster_id) { + return c; + } + } + + // Create cluster if necessary + if (create) { + Cluster* c = New<Cluster>(1); + c->id = cluster_id; + c->next = hashtable_[h]; + hashtable_[h] = c; + return c; + } + return NULL; + } + + // Return the block ID for an address within its cluster + static int BlockID(Number address) { + return (address >> kBlockBits) & (kClusterBlocks - 1); + } + + //-------------------------------------------------------------- + // Memory management -- we keep all objects we allocate linked + // together in a singly linked list so we can get rid of them + // when we are all done. Furthermore, we allow the client to + // pass in custom memory allocator/deallocator routines. + //-------------------------------------------------------------- + struct Object { + Object* next; + // The real data starts here + }; + + Allocator alloc_; // The allocator + DeAllocator dealloc_; // The deallocator + Object* allocated_; // List of allocated objects + + // Allocates a zeroed array of T with length "num". Also inserts + // the allocated block into a linked list so it can be deallocated + // when we are all done. + template <class T> T* New(int num) { + void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T)); + memset(ptr, 0, sizeof(Object) + num*sizeof(T)); + Object* obj = reinterpret_cast<Object*>(ptr); + obj->next = allocated_; + allocated_ = obj; + return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1); + } +}; + +// More implementation details follow: + +template <class Value> +AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc) + : free_(NULL), + alloc_(alloc), + dealloc_(dealloc), + allocated_(NULL) { + hashtable_ = New<Cluster*>(kHashSize); +} + +template <class Value> +AddressMap<Value>::~AddressMap() { + // De-allocate all of the objects we allocated + for (Object* obj = allocated_; obj != NULL; /**/) { + Object* next = obj->next; + (*dealloc_)(obj); + obj = next; + } +} + +template <class Value> +inline const Value* AddressMap<Value>::Find(Key key) const { + return const_cast<AddressMap*>(this)->FindMutable(key); +} + +template <class Value> +inline Value* AddressMap<Value>::FindMutable(Key key) { + const Number num = reinterpret_cast<Number>(key); + const Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { + if (e->key == key) { + return &e->value; + } + } + } + return NULL; +} + +template <class Value> +void AddressMap<Value>::Insert(Key key, Value value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, true/*create*/); + + // Look in linked-list for this block + const int block = BlockID(num); + for (Entry* e = c->blocks[block]; e != NULL; e = e->next) { + if (e->key == key) { + e->value = value; + return; + } + } + + // Create entry + if (free_ == NULL) { + // Allocate a new batch of entries and add to free-list + Entry* array = New<Entry>(ALLOC_COUNT); + for (int i = 0; i < ALLOC_COUNT-1; i++) { + array[i].next = &array[i+1]; + } + array[ALLOC_COUNT-1].next = free_; + free_ = &array[0]; + } + Entry* e = free_; + free_ = e->next; + e->key = key; + e->value = value; + e->next = c->blocks[block]; + c->blocks[block] = e; +} + +template <class Value> +bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) { + Entry* e = *p; + if (e->key == key) { + *removed_value = e->value; + *p = e->next; // Remove e from linked-list + e->next = free_; // Add e to free-list + free_ = e; + return true; + } + } + } + return false; +} + +template <class Value> +const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, + size_t max_size, + Key key, + Key* res_key) { + const Number key_num = reinterpret_cast<Number>(key); + Number num = key_num; // we'll move this to move back through the clusters + while (1) { + const Cluster* c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + while (1) { + const int block = BlockID(num); + bool had_smaller_key = false; + for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { + const Number e_num = reinterpret_cast<Number>(e->key); + if (e_num <= key_num) { + if (e_num == key_num || // to handle 0-sized ranges + key_num < e_num + (*size_func)(e->value)) { + *res_key = e->key; + return &e->value; + } + had_smaller_key = true; + } + } + if (had_smaller_key) return NULL; // got a range before 'key' + // and it did not contain 'key' + if (block == 0) break; + // try address-wise previous block + num |= kBlockSize - 1; // start at the last addr of prev block + num -= kBlockSize; + if (key_num - num > max_size) return NULL; + } + } + if (num < kClusterSize) return NULL; // first cluster + // go to address-wise previous cluster to try + num |= kClusterSize - 1; // start at the last block of previous cluster + num -= kClusterSize; + if (key_num - num > max_size) return NULL; + // Having max_size to limit the search is crucial: else + // we have to traverse a lot of empty clusters (or blocks). + // We can avoid needing max_size if we put clusters into + // a search tree, but performance suffers considerably + // if we use this approach by using stl::set. + } +} + +template <class Value> +template <class Type> +inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), + Type arg) const { + // We could optimize this by traversing only non-empty clusters and/or blocks + // but it does not speed up heap-checker noticeably. + for (int h = 0; h < kHashSize; ++h) { + for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + for (int b = 0; b < kClusterBlocks; ++b) { + for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { + callback(e->key, &e->value, arg); + } + } + } + } +} + +#endif // BASE_ADDRESSMAP_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/base/arm_instruction_set_select.h b/src/third_party/gperftools-2.7/src/base/arm_instruction_set_select.h new file mode 100644 index 00000000000..6fde685272c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/arm_instruction_set_select.h @@ -0,0 +1,84 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Alexander Levitskiy +// +// Generalizes the plethora of ARM flavors available to an easier to manage set +// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto + +#ifndef ARM_INSTRUCTION_SET_SELECT_H_ +#define ARM_INSTRUCTION_SET_SELECT_H_ + +#if defined(__ARM_ARCH_8A__) +# define ARMV8 1 +#endif + +#if defined(ARMV8) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define ARMV7 1 +#endif + +#if defined(ARMV7) || \ + defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define ARMV6 1 +#endif + +#if defined(ARMV6) || \ + defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || \ + defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define ARMV5 1 +#endif + +#if defined(ARMV5) || \ + defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define ARMV4 1 +#endif + +#if defined(ARMV4) || \ + defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define ARMV3 1 +#endif + +#if defined(ARMV3) || \ + defined(__ARM_ARCH_2__) +# define ARMV2 1 +#endif + +#endif // ARM_INSTRUCTION_SET_SELECT_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-generic.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-generic.h new file mode 100644 index 00000000000..d0f941309bb --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-generic.h @@ -0,0 +1,228 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Lei Zhang, Sasha Levitskiy +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// LinuxKernelCmpxchg is from Google Gears. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 0xffff0fc0 is the hard coded address of a function provided by +// the kernel which implements an atomic compare-exchange. On older +// ARM architecture revisions (pre-v6) this may be implemented using +// a syscall. This address is stable, and in active use (hard coded) +// by at least glibc-2.7 and the Android C library. +// pLinuxKernelCmpxchg has both acquire and release barrier sematincs. +typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, + Atomic32 new_value, + volatile Atomic32* ptr); +LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK = + (LinuxKernelCmpxchgFunc) 0xffff0fc0; + +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; + + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = *ptr; + do { + if (!pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void MemoryBarrier() { + pLinuxKernelMemoryBarrier(); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + + +// 64-bit versions are not implemented yet. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Acquire_Store64"); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Release_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Acquire_Load"); + return 0; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Release_Load"); + return 0; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap"); + return 0; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Release_CompareAndSwap"); + return 0; +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-v6plus.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-v6plus.h new file mode 100644 index 00000000000..35f10481b04 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-arm-v6plus.h @@ -0,0 +1,330 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Sasha Levitskiy +// based on atomicops-internals by Sanjay Ghemawat +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// This code implements ARM atomics for architectures V6 and newer. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +// The LDREXD and STREXD instructions in ARM all v7 variants or above. In v6, +// only some variants support it. For simplicity, we only use exclusive +// 64-bit load/store in V7 or above. +#if defined(ARMV7) +# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD +#endif + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level ops + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 oldval, res; + do { + __asm__ __volatile__( + "ldrex %1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + // The following IT (if-then) instruction is needed for the subsequent + // conditional instruction STREXEQ when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) + : "r" (ptr), "Ir" (old_value), "r" (new_value) + : "cc"); + } while (res); + return oldval; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 tmp, old; + __asm__ __volatile__( + "1:\n" + "ldrex %1, [%2]\n" + "strex %0, %3, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (tmp), "=&r" (old) + : "r" (ptr), "r" (new_value) + : "cc", "memory"); + return old; +} + +inline void MemoryBarrier() { +#if !defined(ARMV7) + uint32_t dest = 0; + __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory"); +#else + __asm__ __volatile__("dmb" : : : "memory"); +#endif +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions are only available if LDREXD and STREXD instructions +// are available. +#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +#define BASE_HAS_ATOMIC64 1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 oldval, res; + do { + __asm__ __volatile__( + "ldrexd %1, [%3]\n" + "mov %0, #0\n" + "teq %Q1, %Q4\n" + // The following IT (if-then) instructions are needed for the subsequent + // conditional instructions when compiling in THUMB mode. + // In ARM mode, the compiler/assembler will not generate any code for it. + "it eq\n" + "teqeq %R1, %R4\n" + "it eq\n" + "strexdeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Q" (*ptr) + : "r" (ptr), "Ir" (old_value), "r" (new_value) + : "cc"); + } while (res); + return oldval; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + int store_failed; + Atomic64 old; + __asm__ __volatile__( + "1:\n" + "ldrexd %1, [%2]\n" + "strexd %0, %3, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (store_failed), "=&r" (old) + : "r" (ptr), "r" (new_value) + : "cc", "memory"); + return old; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + int store_failed; + Atomic64 dummy; + __asm__ __volatile__( + "1:\n" + // Dummy load to lock cache line. + "ldrexd %1, [%3]\n" + "strexd %0, %2, [%3]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (store_failed), "=&r"(dummy) + : "r"(value), "r" (ptr) + : "cc", "memory"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 res; + __asm__ __volatile__( + "ldrexd %0, [%1]\n" + "clrex\n" + : "=r" (res) + : "r"(ptr), "Q"(*ptr)); + return res; +} + +#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Acquire_AtomicExchange"); + return 0; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Release_AtomicExchange"); + return 0; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + MemoryBarrier(); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace subtle ends +} // namespace base ends + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-gcc.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-gcc.h new file mode 100644 index 00000000000..f8d27863cb7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-gcc.h @@ -0,0 +1,203 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, Linaro +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Riku Voipio, riku.voipio@linaro.org +// +// atomic primitives implemented with gcc atomic intrinsics: +// http://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html +// + +#ifndef BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ +#define BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + __sync_synchronize(); +} + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELAXED); +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_ACQUIRE); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + return __atomic_exchange_n(const_cast<Atomic32*>(ptr), new_value, __ATOMIC_RELEASE); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELAXED); +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_ACQUIRE); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + return __atomic_exchange_n(const_cast<Atomic64*>(ptr), new_value, __ATOMIC_RELEASE); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value = old_value; + __atomic_compare_exchange_n(ptr, &prev_value, new_value, + 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); + return prev_value; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_GCC_GENERIC_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-linuxppc.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-linuxppc.h new file mode 100644 index 00000000000..b52fdf0d1ec --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-linuxppc.h @@ -0,0 +1,437 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + */ + +// Implementation of atomic operations for ppc-linux. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ +#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ + +typedef int32_t Atomic32; + +#ifdef __PPC64__ +#define BASE_HAS_ATOMIC64 1 +#endif + +namespace base { +namespace subtle { + +static inline void _sync(void) { + __asm__ __volatile__("sync": : : "memory"); +} + +static inline void _lwsync(void) { + // gcc defines __NO_LWSYNC__ when appropriate; see + // http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html +#ifdef __NO_LWSYNC__ + __asm__ __volatile__("msync": : : "memory"); +#else + __asm__ __volatile__("lwsync": : : "memory"); +#endif +} + +static inline void _isync(void) { + __asm__ __volatile__("isync": : : "memory"); +} + +static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + __asm__ __volatile__( +"1: lwarx %0,0,%3\n\ + add %0,%2,%0\n\ + stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + _lwsync(); + t = OSAtomicAdd32(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd32) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 prev; + __asm__ __volatile__( +"1: lwarx %0,0,%2\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n\ + stwcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 t; + t = OSAtomicCompareAndSwap32(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap32) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + _lwsync(); + return OSAtomicCompareAndSwap32(old_value, new_value, value); +} + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + __asm__ __volatile__( +"1: ldarx %0,0,%3\n\ + add %0,%2,%0\n\ + stdcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + _lwsync(); + t = OSAtomicAdd64(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd64) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 prev; + __asm__ __volatile__( +"1: ldarx %0,0,%2\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 t; + t = OSAtomicCompareAndSwap64(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap64) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + _lwsync(); + return OSAtomicCompareAndSwap64(old_value, new_value, value); +} + + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#endif + +inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#endif + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-macosx.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-macosx.h new file mode 100644 index 00000000000..b5130d4f4d7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-macosx.h @@ -0,0 +1,370 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Implementation of atomic operations for Mac OS X. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_ +#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_ + +typedef int32_t Atomic32; + +// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different +// on the Mac, even when they are the same size. Similarly, on __ppc64__, +// AtomicWord and Atomic64 are always different. Thus, we need explicit +// casting. +#ifdef __LP64__ +#define AtomicWordCastType base::subtle::Atomic64 +#else +#define AtomicWordCastType Atomic32 +#endif + +#if defined(__LP64__) || defined(__i386__) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +#include <libkern/OSAtomic.h> + +namespace base { +namespace subtle { + +#if !defined(__LP64__) && defined(__ppc__) + +// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC, +// while the underlying assembly instructions are available only some +// implementations of PowerPC. + +// The following inline functions will fail with the error message at compile +// time ONLY IF they are called. So it is safe to use this header if user +// code only calls AtomicWord and Atomic32 operations. +// +// NOTE(vchen): Implementation notes to implement the atomic ops below may +// be found in "PowerPC Virtual Environment Architecture, Book II, +// Version 2.02", January 28, 2005, Appendix B, page 46. Unfortunately, +// extra care must be taken to ensure data are properly 8-byte aligned, and +// that data are returned correctly according to Mac OS X ABI specs. + +inline int64_t OSAtomicCompareAndSwap64( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicCompareAndSwap64Barrier( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue); + OSMemoryBarrier(); + return prev; +} + +inline int64_t OSAtomicAdd64Barrier( + int64_t theAmount, int64_t *theValue) { + int64_t new_val = OSAtomicAdd64(theAmount, theValue); + OSMemoryBarrier(); + return new_val; +} +#endif + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + OSMemoryBarrier(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit version + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + // The lib kern interface does not distinguish between + // Acquire and Release memory barriers; they are equivalent. + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +#ifdef __LP64__ + +// 64-bit implementation on 64-bit platform + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return *ptr; +} + +#else + +// 64-bit implementation on 32-bit platform + +#if defined(__ppc__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__( + "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + __asm__ __volatile__( + "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +#elif defined(__i386__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + + return value; +} +#endif + + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} +#endif // __LP64__ + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_MACOSX_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-mips.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-mips.h new file mode 100644 index 00000000000..4bfd7f6c70d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-mips.h @@ -0,0 +1,323 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2013, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Author: Jovan Zelincevic <jovan.zelincevic@imgtec.com> +// based on atomicops-internals by Sanjay Ghemawat + +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// This code implements MIPS atomics. + +#ifndef BASE_ATOMICOPS_INTERNALS_MIPS_H_ +#define BASE_ATOMICOPS_INTERNALS_MIPS_H_ + +#if (_MIPS_ISA == _MIPS_ISA_MIPS64) +#define BASE_HAS_ATOMIC64 1 +#endif + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +// Atomically execute: +// result = *ptr; +// if (*ptr == old_value) +// *ptr = new_value; +// return result; +// +// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value". +// Always return the old value of "*ptr" +// +// This routine implies no memory barriers. +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + Atomic32 prev, tmp; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "ll %0, %5 \n" // prev = *ptr + "bne %0, %3, 2f \n" // if (prev != old_value) goto 2 + " move %2, %4 \n" // tmp = new_value + "sc %2, %1 \n" // *ptr = tmp (with atomic check) + "beqz %2, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + "2: \n" + + ".set pop \n" + : "=&r" (prev), "=m" (*ptr), + "=&r" (tmp) + : "Ir" (old_value), "r" (new_value), + "m" (*ptr) + : "memory" + ); + return prev; +} + +// Atomically store new_value into *ptr, returning the previous value held in +// *ptr. This routine implies no memory barriers. +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + Atomic32 temp, old; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "ll %1, %2 \n" // old = *ptr + "move %0, %3 \n" // temp = new_value + "sc %0, %2 \n" // *ptr = temp (with atomic check) + "beqz %0, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + + ".set pop \n" + : "=&r" (temp), "=&r" (old), + "=m" (*ptr) + : "r" (new_value), "m" (*ptr) + : "memory" + ); + return old; +} + +inline void MemoryBarrier() +{ + __asm__ volatile("sync" : : : "memory"); +} + +// "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return res; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) +{ + MemoryBarrier(); + Atomic32 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return res; +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) +{ + *ptr = value; +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) +{ + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) +{ + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) +{ + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) +{ + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) +{ + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) +{ + MemoryBarrier(); + return *ptr; +} + +#if (_MIPS_ISA == _MIPS_ISA_MIPS64) || (_MIPS_SIM == _MIPS_SIM_ABI64) + +typedef int64_t Atomic64; + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + Atomic64 prev, tmp; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "lld %0, %5 \n" // prev = *ptr + "bne %0, %3, 2f \n" // if (prev != old_value) goto 2 + " move %2, %4 \n" // tmp = new_value + "scd %2, %1 \n" // *ptr = tmp (with atomic check) + "beqz %2, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + "2: \n" + + ".set pop \n" + : "=&r" (prev), "=m" (*ptr), + "=&r" (tmp) + : "Ir" (old_value), "r" (new_value), + "m" (*ptr) + : "memory" + ); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + Atomic64 temp, old; + __asm__ volatile( + ".set push \n" + ".set noreorder \n" + + "1: \n" + "lld %1, %2 \n" // old = *ptr + "move %0, %3 \n" // temp = new_value + "scd %0, %2 \n" // *ptr = temp (with atomic check) + "beqz %0, 1b \n" // start again on atomic error + " nop \n" // delay slot nop + + ".set pop \n" + : "=&r" (temp), "=&r" (old), + "=m" (*ptr) + : "r" (new_value), "m" (*ptr) + : "memory" + ); + return old; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return res; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) +{ + MemoryBarrier(); + Atomic64 res = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return res; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) +{ + *ptr = value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) +{ + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) +{ + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) +{ + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) +{ + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) +{ + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) +{ + MemoryBarrier(); + return *ptr; +} + +#endif + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_MIPS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-windows.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-windows.h new file mode 100644 index 00000000000..93ced8770d4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-windows.h @@ -0,0 +1,457 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations using Windows API +// functions. This file should not be included directly. Clients +// should instead include "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ +#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +typedef int32 Atomic32; + +#if defined(_WIN64) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +namespace base { +namespace subtle { + +typedef int64 Atomic64; + +// 32-bit low-level operations on any platform + +extern "C" { +// We use windows intrinsics when we can (they seem to be supported +// well on MSVC 8.0 and above). Unfortunately, in some +// environments, <windows.h> and <intrin.h> have conflicting +// declarations of some other intrinsics, breaking compilation: +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +// Therefore, we simply declare the relevant intrinsics ourself. + +// MinGW has a bug in the header files where it doesn't indicate the +// first argument is volatile -- they're not up to date. See +// http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html +// We have to const_cast away the volatile to avoid compiler warnings. +// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h +#if defined(__MINGW32__) +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval); +} +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return ::InterlockedExchange(const_cast<LONG*>(ptr), newval); +} +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment); +} + +#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 +// Unfortunately, in some environments, <windows.h> and <intrin.h> +// have conflicting declarations of some intrinsics, breaking +// compilation. So we declare the intrinsics we need ourselves. See +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +LONG _InterlockedCompareExchange(volatile LONG* ptr, LONG newval, LONG oldval); +#pragma intrinsic(_InterlockedCompareExchange) +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return _InterlockedCompareExchange(ptr, newval, oldval); +} + +LONG _InterlockedExchange(volatile LONG* ptr, LONG newval); +#pragma intrinsic(_InterlockedExchange) +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return _InterlockedExchange(ptr, newval); +} + +LONG _InterlockedExchangeAdd(volatile LONG* ptr, LONG increment); +#pragma intrinsic(_InterlockedExchangeAdd) +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return _InterlockedExchangeAdd(ptr, increment); +} + +#else +inline LONG FastInterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return ::InterlockedCompareExchange(ptr, newval, oldval); +} +inline LONG FastInterlockedExchange(volatile LONG* ptr, LONG newval) { + return ::InterlockedExchange(ptr, newval); +} +inline LONG FastInterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return ::InterlockedExchangeAdd(ptr, increment); +} + +#endif // ifdef __MINGW32__ +} // extern "C" + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + LONG result = FastInterlockedCompareExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value), + static_cast<LONG>(old_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + LONG result = FastInterlockedExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +} // namespace base::subtle +} // namespace base + + +// In msvc8/vs2005, winnt.h already contains a definition for +// MemoryBarrier in the global namespace. Add it there for earlier +// versions and forward to it from within the namespace. +#if !(defined(_MSC_VER) && _MSC_VER >= 1400) +inline void MemoryBarrier() { + Atomic32 value = 0; + base::subtle::NoBarrier_AtomicExchange(&value, 0); + // actually acts as a barrier in thisd implementation +} +#endif + +namespace base { +namespace subtle { + +inline void MemoryBarrier() { + ::MemoryBarrier(); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + Acquire_AtomicExchange(ptr, value); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + // See comments in Atomic64 version of Release_Store() below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit operations + +#if defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 64-bit platform. + +COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); + +// These are the intrinsics needed for 64-bit operations. Similar to the +// 32-bit case above. + +extern "C" { +#if defined(__MINGW64__) +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), + newval, oldval); +} +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); +} +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); +} + +#elif _MSC_VER >= 1400 // intrinsics didn't work so well before MSVC 8.0 +// Like above, we need to declare the intrinsics ourselves. +PVOID _InterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval); +#pragma intrinsic(_InterlockedCompareExchangePointer) +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return _InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), + newval, oldval); +} + +PVOID _InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval); +#pragma intrinsic(_InterlockedExchangePointer) +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return _InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); +} + +LONGLONG _InterlockedExchangeAdd64(volatile LONGLONG* ptr, LONGLONG increment); +#pragma intrinsic(_InterlockedExchangeAdd64) +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return _InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); +} + +#else +inline PVOID FastInterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return ::InterlockedCompareExchangePointer(ptr, newval, oldval); +} +inline PVOID FastInterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return ::InterlockedExchangePointer(ptr, newval); +} +inline LONGLONG FastInterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return ::InterlockedExchangeAdd64(ptr, increment); +} + +#endif // ifdef __MINGW64__ +} // extern "C" + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + PVOID result = FastInterlockedCompareExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + PVOID result = FastInterlockedExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 32-bit platform + +// TODO(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. Then the file should be renamed to ...-x86-msvc.h, probably. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { +#if 0 // Not implemented + Atomic64 prev; + __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same + : "=A" (prev) // as contents of ptr: + : "m" (*ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "r" (&new_value) // old *ptr => edx:eax + : "memory", "%ebx", "%ecx"); + return prev; +#else + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { +#if 0 // Not implemented + __asm__ __volatile__( + "movl (%2), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%2), %%ecx\n\t" // ecx:ebx + "0:\n\t" + "movl %1, %%eax\n\t" // Read contents of ptr into + "movl 4%1, %%edx\n\t" // edx:eax + "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A" (new_value) + : "m" (*ptr), + "r" (&new_value) + : "memory", "%ebx", "%ecx"); + return new_value; // Now it's the previous value. +#else + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +#endif +} + +inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value) +{ + __asm { + movq mm0, value; // Use mmx reg for 64-bit atomic moves + mov eax, ptrValue; + movq [eax], mm0; + emms; // Empty mmx state to enable FP registers + } +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue) +{ + Atomic64 value; + __asm { + mov eax, ptrValue; + movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves + movq value, mm0; + emms; // Empty mmx state to enable FP registers + } + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(_WIN64) || defined(__MINGW64__) + + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.cc b/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.cc new file mode 100644 index 00000000000..c3391e78234 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.cc @@ -0,0 +1,112 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This module gets enough CPU information to optimize the + * atomicops module on x86. + */ + +#include "base/atomicops.h" +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <string.h> + +// This file only makes sense with atomicops-internals-x86.h -- it +// depends on structs that are defined in that file. If atomicops.h +// doesn't sub-include that file, then we aren't needed, and shouldn't +// try to do anything. +#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ + +// Inline cpuid instruction. In PIC compilations, %ebx contains the address +// of the global offset table. To avoid breaking such executables, this code +// must preserve that register's value across cpuid instructions. +#if defined(__i386__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#elif defined (__x86_64__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#endif + +#if defined(cpuid) // initialize the struct only on x86 + +// Set the flags so that code will run correctly and conservatively +// until InitGoogle() is called. +struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = { + false, // no SSE2 + false // no cmpxchg16b +}; + +// Initialize the AtomicOps_Internalx86CPUFeatures struct. +static void AtomicOps_Internalx86CPUFeaturesInit() { + uint32 eax; + uint32 ebx; + uint32 ecx; + uint32 edx; + + // Get vendor string (issue CPUID with eax = 0) + cpuid(eax, ebx, ecx, edx, 0); + char vendor[13]; + memcpy(vendor, &ebx, 4); + memcpy(vendor + 4, &edx, 4); + memcpy(vendor + 8, &ecx, 4); + vendor[12] = 0; + + // get feature flags in ecx/edx, and family/model in eax + cpuid(eax, ebx, ecx, edx, 1); + + int family = (eax >> 8) & 0xf; // family and model fields + int model = (eax >> 4) & 0xf; + if (family == 0xf) { // use extended family and model fields + family += (eax >> 20) & 0xff; + model += ((eax >> 16) & 0xf) << 4; + } + + // edx bit 26 is SSE2 which we use to tell use whether we can use mfence + AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1); + + // ecx bit 13 indicates whether the cmpxchg16b instruction is supported + AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1); +} + +REGISTER_MODULE_INITIALIZER(atomicops_x86, { + AtomicOps_Internalx86CPUFeaturesInit(); +}); + +#endif + +#endif /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.h b/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.h new file mode 100644 index 00000000000..e441ac7e673 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops-internals-x86.h @@ -0,0 +1,391 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations for x86. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ +#define BASE_ATOMICOPS_INTERNALS_X86_H_ +#include "base/basictypes.h" + +typedef int32_t Atomic32; +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* + + +// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it +// already matches Atomic32 or Atomic64, depending on the platform. + + +// This struct is not part of the public API of this module; clients may not +// use it. +// Features of this x86. Values may not be correct before main() is run, +// but are set conservatively. +struct AtomicOps_x86CPUFeatureStruct { + bool has_sse2; // Processor has SSE2. + bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. +}; + +ATTRIBUTE_VISIBILITY_HIDDEN +extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; + + +#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") + + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level operations on any platform. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // xchgl already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return x; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +#if defined(__x86_64__) + +// 64-bit implementations of memory barrier can be simpler, because it +// "mfence" is guaranteed to exist. +inline void MemoryBarrier() { + __asm__ __volatile__("mfence" : : : "memory"); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +#else + +inline void MemoryBarrier() { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + __asm__ __volatile__("mfence" : : : "memory"); + } else { // mfence is faster but not present on PIII + Atomic32 x = 0; + Acquire_AtomicExchange(&x, 0); + } +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + *ptr = value; + __asm__ __volatile__("mfence" : : : "memory"); + } else { + Acquire_AtomicExchange(ptr, value); + } +} +#endif + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier. + // See comments in Atomic64 version of Release_Store(), below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in Atomic64 version of Release_Store(), below. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +#if defined(__x86_64__) + +// 64-bit low-level operations on 64-bit platform. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // xchgq already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + + *ptr = value; // An x86 store acts as a release barrier + // for current AMD/Intel chips as of Jan 2008. + // See also Acquire_Load(), below. + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm + // + // x86 stores/loads fail to act as barriers for a few instructions (clflush + // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are + // not generated by the compiler, and are rare. Users of these instructions + // need to know about cache behaviour in any case since all of these involve + // either flushing cache lines or non-temporal cache hints. +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(__x86_64__) + +// 64-bit low-level operations on 32-bit platform. + +#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +// For compilers older than gcc 4.1, we use inline asm. +// +// Potential pitfalls: +// +// 1. %ebx points to Global offset table (GOT) with -fPIC. +// We need to preserve this register. +// 2. When explicit registers are used in inline asm, the +// compiler may not be aware of it and might try to reuse +// the same register for another argument which has constraints +// that allow it ("r" for example). + +inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("push %%ebx\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same + "pop %%ebx\n\t" + : "=A" (prev) // as contents of ptr: + : "D" (ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "S" (&new_value) // old *ptr => edx:eax + : "memory", "%ecx"); + return prev; +} +#endif // Compiler < gcc-4.1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_val, + Atomic64 new_val) { + return __sync_val_compare_and_swap(ptr, old_val, new_val); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val; + + do { + old_val = *ptr; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + + return old_val; +} + +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + return NoBarrier_AtomicExchange(ptr, new_val); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(__x86_64__) + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + return x; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#undef ATOMICOPS_COMPILER_BARRIER + +#endif // BASE_ATOMICOPS_INTERNALS_X86_H_ diff --git a/src/third_party/gperftools-2.7/src/base/atomicops.h b/src/third_party/gperftools-2.7/src/base/atomicops.h new file mode 100644 index 00000000000..dac95be86ab --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/atomicops.h @@ -0,0 +1,399 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// For atomic operations on statistics counters, see atomic_stats_counter.h. +// For atomic operations on sequence numbers, see atomic_sequence_num.h. +// For atomic operations on reference counts, see atomic_refcount.h. + +// Some fast atomic operations -- typically with machine-dependent +// implementations. This file may need editing as Google code is +// ported to different architectures. + +// The routines exported by this module are subtle. If you use them, even if +// you get the code right, it will depend on careful reasoning about atomicity +// and memory ordering; it will be less readable, and harder to maintain. If +// you plan to use these routines, you should have a good reason, such as solid +// evidence that performance would otherwise suffer, or there being no +// alternative. You should assume only properties explicitly guaranteed by the +// specifications in this file. You are almost certainly _not_ writing code +// just for the x86; if you assume x86 semantics, x86 hardware bugs and +// implementations on other archtectures will cause your code to break. If you +// do not know what you are doing, avoid these routines, and use a Mutex. +// +// These following lower-level operations are typically useful only to people +// implementing higher-level synchronization operations like spinlocks, +// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or +// a store with appropriate memory-ordering instructions. "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +// +// It is incorrect to make direct assignments to/from an atomic variable. +// You should use one of the Load or Store routines. The NoBarrier +// versions are provided when no barriers are needed: +// NoBarrier_Store() +// NoBarrier_Load() +// Although there are currently no compiler enforcement, you are encouraged +// to use these. Moreover, if you choose to use base::subtle::Atomic64 type, +// you MUST use one of the Load or Store routines to get correct behavior +// on 32-bit platforms. +// +// The intent is eventually to put all of these routines in namespace +// base::subtle + +#ifndef THREAD_ATOMICOPS_H_ +#define THREAD_ATOMICOPS_H_ + +#include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + +// ------------------------------------------------------------------------ +// Include the platform specific implementations of the types +// and operations listed below. Implementations are to provide Atomic32 +// and Atomic64 operations. If there is a mismatch between intptr_t and +// the Atomic32 or Atomic64 types for a platform, the platform-specific header +// should define the macro, AtomicWordCastType in a clause similar to the +// following: +// #if ...pointers are 64 bits... +// # define AtomicWordCastType base::subtle::Atomic64 +// #else +// # define AtomicWordCastType Atomic32 +// #endif +// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?) +// ------------------------------------------------------------------------ + +#include "base/arm_instruction_set_select.h" +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) + +#define CLANG_VERSION (__clang_major__ * 10000 \ + + __clang_minor__ * 100 \ + + __clang_patchlevel__) + +#if defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__GNUC__) && GCC_VERSION >= 40700 +#include "base/atomicops-internals-gcc.h" +#elif defined(TCMALLOC_PREFER_GCC_ATOMICS) && defined(__clang__) && CLANG_VERSION >= 30400 +#include "base/atomicops-internals-gcc.h" +#elif defined(__MACH__) && defined(__APPLE__) +#include "base/atomicops-internals-macosx.h" +#elif defined(__GNUC__) && defined(ARMV6) +#include "base/atomicops-internals-arm-v6plus.h" +#elif defined(ARMV3) +#include "base/atomicops-internals-arm-generic.h" +#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) +#include "base/atomicops-internals-x86.h" +#elif defined(_WIN32) +#include "base/atomicops-internals-windows.h" +#elif defined(__linux__) && defined(__PPC__) +#include "base/atomicops-internals-linuxppc.h" +#elif defined(__GNUC__) && defined(__mips__) +#include "base/atomicops-internals-mips.h" +#elif defined(__GNUC__) && GCC_VERSION >= 40700 +#include "base/atomicops-internals-gcc.h" +#elif defined(__clang__) && CLANG_VERSION >= 30400 +#include "base/atomicops-internals-gcc.h" +#else +#error You need to implement atomic operations for this architecture +#endif + +// Signed type that can hold a pointer and supports the atomic ops below, as +// well as atomic loads and stores. Instances must be naturally-aligned. +typedef intptr_t AtomicWord; + +#ifdef AtomicWordCastType +// ------------------------------------------------------------------------ +// This section is needed only when explicit type casting is required to +// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32). +// It also serves to document the AtomicWord interface. +// ------------------------------------------------------------------------ + +namespace base { +namespace subtle { + +// Atomically execute: +// result = *ptr; +// if (*ptr == old_value) +// *ptr = new_value; +// return result; +// +// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value". +// Always return the old value of "*ptr" +// +// This routine implies no memory barriers. +inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return NoBarrier_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +// Atomically store new_value into *ptr, returning the previous value held in +// *ptr. This routine implies no memory barriers. +inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return NoBarrier_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Acquire_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Release_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) { + NoBarrier_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) { + return NoBarrier_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +} // namespace base::subtle +} // namespace base +#endif // AtomicWordCastType + +// ------------------------------------------------------------------------ +// Commented out type definitions and method declarations for documentation +// of the interface provided by this module. +// ------------------------------------------------------------------------ + +#if 0 + +// Signed 32-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits. +typedef int32_t Atomic32; + +// Corresponding operations on Atomic32 +namespace base { +namespace subtle { + +// Signed 64-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits. +typedef int64_t Atomic64; + +Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value); +void Acquire_Store(volatile Atomic32* ptr, Atomic32 value); +void Release_Store(volatile Atomic32* ptr, Atomic32 value); +Atomic32 NoBarrier_Load(volatile const Atomic32* ptr); +Atomic32 Acquire_Load(volatile const Atomic32* ptr); +Atomic32 Release_Load(volatile const Atomic32* ptr); + +// Corresponding operations on Atomic64 +Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); + +Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value); +void Acquire_Store(volatile Atomic64* ptr, Atomic64 value); +void Release_Store(volatile Atomic64* ptr, Atomic64 value); +Atomic64 NoBarrier_Load(volatile const Atomic64* ptr); +Atomic64 Acquire_Load(volatile const Atomic64* ptr); +Atomic64 Release_Load(volatile const Atomic64* ptr); +} // namespace base::subtle +} // namespace base + +void MemoryBarrier(); + +#endif // 0 + + +// ------------------------------------------------------------------------ +// The following are to be deprecated when all uses have been changed to +// use the base::subtle namespace. +// ------------------------------------------------------------------------ + +#ifdef AtomicWordCastType +// AtomicWord versions to be deprecated +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store(ptr, value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store(ptr, value); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load(ptr); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load(ptr); +} +#endif // AtomicWordCastType + +// 32-bit Acquire/Release operations to be deprecated. + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + return base::subtle::Release_Store(ptr, value); +} +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + return base::subtle::Release_Load(ptr); +} + +#ifdef BASE_HAS_ATOMIC64 + +// 64-bit Acquire/Release operations to be deprecated. + +inline base::subtle::Atomic64 Acquire_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline base::subtle::Atomic64 Release_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + return base::subtle::Release_Store(ptr, value); +} +inline base::subtle::Atomic64 Acquire_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline base::subtle::Atomic64 Release_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Release_Load(ptr); +} + +#endif // BASE_HAS_ATOMIC64 + +#endif // THREAD_ATOMICOPS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/basictypes.h b/src/third_party/gperftools-2.7/src/base/basictypes.h new file mode 100644 index 00000000000..42dbe5ceb94 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/basictypes.h @@ -0,0 +1,436 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef _BASICTYPES_H_ +#define _BASICTYPES_H_ + +#include <config.h> +#include <string.h> // for memcpy() +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // gets us PRId64, etc +#endif + +// To use this in an autoconf setting, make sure you run the following +// autoconf macros: +// AC_HEADER_STDC /* for stdint_h and inttypes_h */ +// AC_CHECK_TYPES([__int64]) /* defined in some windows platforms */ + +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // uint16_t might be here; PRId64 too. +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#endif +#include <sys/types.h> // our last best hope for uint16_t + +// Standard typedefs +// All Google code is compiled with -funsigned-char to make "char" +// unsigned. Google code therefore doesn't need a "uchar" type. +// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems? +typedef signed char schar; +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +const uint16 kuint16max = ( (uint16) 0xFFFF); +const uint32 kuint32max = ( (uint32) 0xFFFFFFFF); +const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max ); + +const int8 kint8max = ( ( int8) 0x7F); +const int16 kint16max = ( ( int16) 0x7FFF); +const int32 kint32max = ( ( int32) 0x7FFFFFFF); +const int64 kint64max = ( ((( int64) kint32max) << 32) | kuint32max ); + +const int8 kint8min = ( ( int8) 0x80); +const int16 kint16min = ( ( int16) 0x8000); +const int32 kint32min = ( ( int32) 0x80000000); +const int64 kint64min = ( (((uint64) kint32min) << 32) | 0 ); + +// Define the "portable" printf and scanf macros, if they're not +// already there (via the inttypes.h we #included above, hopefully). +// Mostly it's old systems that don't support inttypes.h, so we assume +// they're 32 bit. +#ifndef PRIx64 +#define PRIx64 "llx" +#endif +#ifndef SCNx64 +#define SCNx64 "llx" +#endif +#ifndef PRId64 +#define PRId64 "lld" +#endif +#ifndef SCNd64 +#define SCNd64 "lld" +#endif +#ifndef PRIu64 +#define PRIu64 "llu" +#endif +#ifndef PRIxPTR +#define PRIxPTR "lx" +#endif + +// Also allow for printing of a pthread_t. +#define GPRIuPTHREAD "lu" +#define GPRIxPTHREAD "lx" +#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__) +#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt) +#else +#define PRINTABLE_PTHREAD(pthreadt) pthreadt +#endif + +#ifdef HAVE_BUILTIN_EXPECT +#define PREDICT_TRUE(x) __builtin_expect(!!(x), 1) +#define PREDICT_FALSE(x) __builtin_expect(!!(x), 0) +#else +#define PREDICT_TRUE(x) (x) +#define PREDICT_FALSE(x) (x) +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An alternate name that leaves out the moral judgment... :-) +#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName) + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int), +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. +// +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert<bool(expr)> +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +template <bool> +struct CompileAssert { +}; + +#ifdef HAVE___ATTRIBUTE__ +# define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +# define ATTRIBUTE_UNUSED +#endif + +#if defined(HAVE___ATTRIBUTE__) && defined(HAVE_TLS) +#define ATTR_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec"))) +#else +#define ATTR_INITIAL_EXEC +#endif + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] ATTRIBUTE_UNUSED + +#define arraysize(a) (sizeof(a) / sizeof(*(a))) + +#define OFFSETOF_MEMBER(strct, field) \ + (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) - \ + reinterpret_cast<char*>(16)) + +// bit_cast<Dest,Source> implements the equivalent of +// "*reinterpret_cast<Dest*>(&source)". +// +// The reinterpret_cast method would produce undefined behavior +// according to ISO C++ specification section 3.10 -15 -. +// bit_cast<> calls memcpy() which is blessed by the standard, +// especially by the example in section 3.9. +// +// Fortunately memcpy() is very fast. In optimized mode, with a +// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline +// code with the minimal amount of data movement. On a 32-bit system, +// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) +// compiles to two loads and two stores. + +template <class Dest, class Source> +inline Dest bit_cast(const Source& source) { + COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes); + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// bit_store<Dest,Source> implements the equivalent of +// "dest = *reinterpret_cast<Dest*>(&source)". +// +// This prevents undefined behavior when the dest pointer is unaligned. +template <class Dest, class Source> +inline void bit_store(Dest *dest, const Source *source) { + COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes); + memcpy(dest, source, sizeof(Dest)); +} + +#ifdef HAVE___ATTRIBUTE__ +# define ATTRIBUTE_WEAK __attribute__((weak)) +# define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +# define ATTRIBUTE_WEAK +# define ATTRIBUTE_NOINLINE +#endif + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +# define ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#else +# define ATTRIBUTE_VISIBILITY_HIDDEN +#endif + +// Section attributes are supported for both ELF and Mach-O, but in +// very different ways. Here's the API we provide: +// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions +// you want to be in the same linker section +// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique +// name. You want to make sure this is executed before any +// DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them +// in the same .cc file. Put this call at the global level. +// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in +// multiple places to help ensure execution before any +// DECLARE_ATTRIBUTE_SECTION_VARS. You must have at least one +// DEFINE, but you can have many INITs. Put each in its own scope. +// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using +// ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name. +// Put this call at the global level. +// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say +// where in memory a given section is. All functions declared with +// ATTRIBUTE_SECTION are guaranteed to be between START and STOP. + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name))) __attribute__((noinline)) + + // Weak section declaration to be used as a global declaration + // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link + // even without functions with ATTRIBUTE_SECTION(name). +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char __start_##name[] ATTRIBUTE_WEAK; \ + extern char __stop_##name[] ATTRIBUTE_WEAK +# define INIT_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF + + // Return void* pointers to start/end of a section of code with functions + // having ATTRIBUTE_SECTION(name), or 0 if no such function exists. + // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link. +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name))) + +#include <mach-o/getsect.h> +#include <mach-o/dyld.h> +class AssignAttributeStartEnd { + public: + AssignAttributeStartEnd(const char* name, char** pstart, char** pend) { + // Find out what dynamic library name is defined in + if (_dyld_present()) { + for (int i = _dyld_image_count() - 1; i >= 0; --i) { + const mach_header* hdr = _dyld_get_image_header(i); +#ifdef MH_MAGIC_64 + if (hdr->magic == MH_MAGIC_64) { + uint64_t len; + *pstart = getsectdatafromheader_64((mach_header_64*)hdr, + "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } +#endif + if (hdr->magic == MH_MAGIC) { + uint32_t len; + *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } + } + } + // If we get here, not defined in a dll at all. See if defined statically. + unsigned long len; // don't ask me why this type isn't uint32_t too... + *pstart = getsectdata("__TEXT", name, &len); + *pend = *pstart + len; + } +}; + +#define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char* __start_##name; \ + extern char* __stop_##name + +#define INIT_ATTRIBUTE_SECTION_VARS(name) \ + DECLARE_ATTRIBUTE_SECTION_VARS(name); \ + static const AssignAttributeStartEnd __assign_##name( \ + #name, &__start_##name, &__stop_##name) + +#define DEFINE_ATTRIBUTE_SECTION_VARS(name) \ + char* __start_##name, *__stop_##name; \ + INIT_ATTRIBUTE_SECTION_VARS(name) + +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#else // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__ +# define ATTRIBUTE_SECTION(name) +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) +# define INIT_ATTRIBUTE_SECTION_VARS(name) +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0)) + +#endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ + +#if defined(HAVE___ATTRIBUTE__) +# if (defined(__i386__) || defined(__x86_64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) +# elif (defined(__PPC__) || defined(__PPC64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(16))) +# elif (defined(__arm__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) + // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned +# elif (defined(__mips__)) +# define CACHELINE_ALIGNED __attribute__((aligned(128))) +# elif (defined(__aarch64__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) + // implementation specific, Cortex-A53 and 57 should have 64 bytes +# elif (defined(__s390__)) +# define CACHELINE_ALIGNED __attribute__((aligned(256))) +# else +# error Could not determine cache line length - unknown architecture +# endif +#else +# define CACHELINE_ALIGNED +#endif // defined(HAVE___ATTRIBUTE__) + +#if defined(HAVE___ATTRIBUTE__ALIGNED_FN) +# define CACHELINE_ALIGNED_FN CACHELINE_ALIGNED +#else +# define CACHELINE_ALIGNED_FN +#endif + +// Structure for discovering alignment +union MemoryAligner { + void* p; + double d; + size_t s; +} CACHELINE_ALIGNED; + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +#define ATTRIBUTE_HIDDEN __attribute__((visibility("hidden"))) +#else +#define ATTRIBUTE_HIDDEN +#endif + +#if defined(__GNUC__) +#define ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +#define ATTRIBUTE_ALWAYS_INLINE __forceinline +#else +#define ATTRIBUTE_ALWAYS_INLINE +#endif + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static nistance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, then a constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} + +#endif // _BASICTYPES_H_ diff --git a/src/third_party/gperftools-2.7/src/base/commandlineflags.h b/src/third_party/gperftools-2.7/src/base/commandlineflags.h new file mode 100644 index 00000000000..49c904f891a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/commandlineflags.h @@ -0,0 +1,175 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file is a compatibility layer that defines Google's version of +// command line flags that are used for configuration. +// +// We put flags into their own namespace. It is purposefully +// named in an opaque way that people should have trouble typing +// directly. The idea is that DEFINE puts the flag in the weird +// namespace, and DECLARE imports the flag from there into the +// current namespace. The net result is to force people to use +// DECLARE to get access to a flag, rather than saying +// extern bool FLAGS_logtostderr; +// or some such instead. We want this so we can put extra +// functionality (like sanity-checking) in DECLARE if we want, +// and make sure it is picked up everywhere. +// +// We also put the type of the variable in the namespace, so that +// people can't DECLARE_int32 something that they DEFINE_bool'd +// elsewhere. +#ifndef BASE_COMMANDLINEFLAGS_H_ +#define BASE_COMMANDLINEFLAGS_H_ + +#include <config.h> +#include <string> +#include <string.h> // for memchr +#include <stdlib.h> // for getenv +#include "base/basictypes.h" + +#define DECLARE_VARIABLE(type, name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + extern PERFTOOLS_DLL_DECL type FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +#define DEFINE_VARIABLE(type, name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + PERFTOOLS_DLL_DECL type FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +// bool specialization +#define DECLARE_bool(name) \ + DECLARE_VARIABLE(bool, name) +#define DEFINE_bool(name, value, meaning) \ + DEFINE_VARIABLE(bool, name, value, meaning) + +// int32 specialization +#define DECLARE_int32(name) \ + DECLARE_VARIABLE(int32, name) +#define DEFINE_int32(name, value, meaning) \ + DEFINE_VARIABLE(int32, name, value, meaning) + +// int64 specialization +#define DECLARE_int64(name) \ + DECLARE_VARIABLE(int64, name) +#define DEFINE_int64(name, value, meaning) \ + DEFINE_VARIABLE(int64, name, value, meaning) + +#define DECLARE_uint64(name) \ + DECLARE_VARIABLE(uint64, name) +#define DEFINE_uint64(name, value, meaning) \ + DEFINE_VARIABLE(uint64, name, value, meaning) + +// double specialization +#define DECLARE_double(name) \ + DECLARE_VARIABLE(double, name) +#define DEFINE_double(name, value, meaning) \ + DEFINE_VARIABLE(double, name, value, meaning) + +// Special case for string, because we have to specify the namespace +// std::string, which doesn't play nicely with our FLAG__namespace hackery. +#define DECLARE_string(name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + extern std::string FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name +#define DEFINE_string(name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + std::string FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name + +// implemented in sysinfo.cc +namespace tcmalloc { + namespace commandlineflags { + + inline bool StringToBool(const char *value, bool def) { + if (!value) { + return def; + } + switch (value[0]) { + case 't': + case 'T': + case 'y': + case 'Y': + case '1': + case '\0': + return true; + } + return false; + } + + inline int StringToInt(const char *value, int def) { + if (!value) { + return def; + } + return strtol(value, NULL, 10); + } + + inline long long StringToLongLong(const char *value, long long def) { + if (!value) { + return def; + } + return strtoll(value, NULL, 10); + } + + inline double StringToDouble(const char *value, double def) { + if (!value) { + return def; + } + return strtod(value, NULL); + } + } +} + +// These macros (could be functions, but I don't want to bother with a .cc +// file), make it easier to initialize flags from the environment. + +#define EnvToString(envname, dflt) \ + (!getenv(envname) ? (dflt) : getenv(envname)) + +#define EnvToBool(envname, dflt) \ + tcmalloc::commandlineflags::StringToBool(getenv(envname), dflt) + +#define EnvToInt(envname, dflt) \ + tcmalloc::commandlineflags::StringToInt(getenv(envname), dflt) + +#define EnvToInt64(envname, dflt) \ + tcmalloc::commandlineflags::StringToLongLong(getenv(envname), dflt) + +#define EnvToDouble(envname, dflt) \ + tcmalloc::commandlineflags::StringToDouble(getenv(envname), dflt) + +#endif // BASE_COMMANDLINEFLAGS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/dynamic_annotations.c b/src/third_party/gperftools-2.7/src/base/dynamic_annotations.c new file mode 100644 index 00000000000..87bd2ecde97 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/dynamic_annotations.c @@ -0,0 +1,179 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#ifdef __cplusplus +# error "This file should be built as pure C to avoid name mangling" +#endif + +#include "config.h" +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" +#include "getenv_safe.h" // for TCMallocGetenvSafe + +#ifdef __GNUC__ +/* valgrind.h uses gcc extensions so it won't build with other compilers */ +# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */ +# include <valgrind.h> +# else /* otherwise just use the copy that we have */ +# include "third_party/valgrind.h" +# endif +#endif + +/* Compiler-based ThreadSanitizer defines + DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1 + and provides its own definitions of the functions. */ + +#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL +# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0 +#endif + +/* Each function is empty and called (via a macro) only in debug mode. + The arguments are captured by dynamic tools at runtime. */ + +#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \ + && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 + +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed) {} +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier) {} + +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *mem, + long size, + const char *description) {} +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +void AnnotateThreadName(const char *file, int line, + const char *name){} +void AnnotateIgnoreReadsBegin(const char *file, int line){} +void AnnotateIgnoreReadsEnd(const char *file, int line){} +void AnnotateIgnoreWritesBegin(const char *file, int line){} +void AnnotateIgnoreWritesEnd(const char *file, int line){} +void AnnotateEnableRaceDetection(const char *file, int line, int enable){} +void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} +void AnnotateFlushState(const char *file, int line){} + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 + && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */ + +#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 + +static int GetRunningOnValgrind(void) { +#ifdef RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND) return 1; +#endif + const char *running_on_valgrind_str = TCMallocGetenvSafe("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +/* See the comments in dynamic_annotations.h */ +int RunningOnValgrind(void) { + static volatile int running_on_valgrind = -1; + int local_running_on_valgrind = running_on_valgrind; + /* C doesn't have thread-safe initialization of statics, and we + don't want to depend on pthread_once here, so hack it. */ + ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack"); + if (local_running_on_valgrind == -1) + running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind(); + return local_running_on_valgrind; +} + +#endif /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */ + +/* See the comments in dynamic_annotations.h */ +double ValgrindSlowdown(void) { + /* Same initialization hack as in RunningOnValgrind(). */ + static volatile double slowdown = 0.0; + double local_slowdown = slowdown; + ANNOTATE_BENIGN_RACE(&slowdown, "safe hack"); + if (RunningOnValgrind() == 0) { + return 1.0; + } + if (local_slowdown == 0.0) { + char *env = getenv("VALGRIND_SLOWDOWN"); + slowdown = local_slowdown = env ? atof(env) : 50.0; + } + return local_slowdown; +} diff --git a/src/third_party/gperftools-2.7/src/base/dynamic_annotations.h b/src/third_party/gperftools-2.7/src/base/dynamic_annotations.h new file mode 100644 index 00000000000..4669315ced3 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/dynamic_annotations.h @@ -0,0 +1,627 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +/* This file defines dynamic annotations for use with dynamic analysis + tool such as valgrind, PIN, etc. + + Dynamic annotation is a source code annotation that affects + the generated code (that is, the annotation is not a comment). + Each such annotation is attached to a particular + instruction and/or to a particular object (address) in the program. + + The annotations that should be used by users are macros in all upper-case + (e.g., ANNOTATE_NEW_MEMORY). + + Actual implementation of these macros may differ depending on the + dynamic analysis tool being used. + + See http://code.google.com/p/data-race-test/ for more information. + + This file supports the following dynamic analysis tools: + - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). + Macros are defined empty. + - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). + Macros are defined as calls to non-inlinable empty functions + that are intercepted by Valgrind. */ + +#ifndef BASE_DYNAMIC_ANNOTATIONS_H_ +#define BASE_DYNAMIC_ANNOTATIONS_H_ + +#ifndef DYNAMIC_ANNOTATIONS_ENABLED +# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 + + /* ------------------------------------------------------------- + Annotations useful when implementing condition variables such as CondVar, + using conditional critical sections (Await/LockWhen) and when constructing + user-defined synchronization mechanisms. + + The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + be used to define happens-before arcs in user-defined synchronization + mechanisms: the race detector will infer an arc from the former to the + latter when they share the same argument pointer. + + Example 1 (reference counting): + + void Unref() { + ANNOTATE_HAPPENS_BEFORE(&refcount_); + if (AtomicDecrementByOne(&refcount_) == 0) { + ANNOTATE_HAPPENS_AFTER(&refcount_); + delete this; + } + } + + Example 2 (message queue): + + void MyQueue::Put(Type *e) { + MutexLock lock(&mu_); + ANNOTATE_HAPPENS_BEFORE(e); + PutElementIntoMyQueue(e); + } + + Type *MyQueue::Get() { + MutexLock lock(&mu_); + Type *e = GetElementFromMyQueue(); + ANNOTATE_HAPPENS_AFTER(e); + return e; + } + + Note: when possible, please use the existing reference counting and message + queue implementations instead of inventing new ones. */ + + /* Report that wait on the condition variable at address "cv" has succeeded + and the lock at address "lock" is held. */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) + + /* Report that wait on the condition variable at "cv" has succeeded. Variant + w/o lock. */ + #define ANNOTATE_CONDVAR_WAIT(cv) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) + + /* Report that we are about to signal on the condition variable at address + "cv". */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) \ + AnnotateCondVarSignal(__FILE__, __LINE__, cv) + + /* Report that we are about to signal_all on the condition variable at "cv". */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) + + /* Annotations for user-defined synchronization mechanisms. */ + #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) + #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) + + /* Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + Note: this annotation may not work properly if the race detector uses + sampling, i.e. does not observe all memory accesses. + */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) + + /* DEPRECATED. Don't use it. */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) + + /* DEPRECATED. Don't use it. */ + #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ + do { \ + ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ + ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ + } while (0) + + /* Instruct the tool to create a happens-before arc between mu->Unlock() and + mu->Lock(). This annotation may slow down the race detector and hide real + races. Normally it is used only when it would be difficult to annotate each + of the mutex's critical sections individually using the annotations above. + This annotation makes sense only for hybrid race detectors. For pure + happens-before detectors this is a no-op. For more details see + http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + /* ------------------------------------------------------------- + Annotations useful when defining memory allocators, or when memory that + was protected in one way starts to be protected in another. */ + + /* Report that a new memory at "address" of size "size" has been allocated. + This might be used when the memory has been retrieved from a free list and + is about to be reused, or when a the locking discipline for a variable + changes. */ + #define ANNOTATE_NEW_MEMORY(address, size) \ + AnnotateNewMemory(__FILE__, __LINE__, address, size) + + /* ------------------------------------------------------------- + Annotations useful when defining FIFO queues that transfer data between + threads. */ + + /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at + address "pcq" has been created. The ANNOTATE_PCQ_* annotations + should be used only for FIFO queues. For non-FIFO queues use + ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ + #define ANNOTATE_PCQ_CREATE(pcq) \ + AnnotatePCQCreate(__FILE__, __LINE__, pcq) + + /* Report that the queue at address "pcq" is about to be destroyed. */ + #define ANNOTATE_PCQ_DESTROY(pcq) \ + AnnotatePCQDestroy(__FILE__, __LINE__, pcq) + + /* Report that we are about to put an element into a FIFO queue at address + "pcq". */ + #define ANNOTATE_PCQ_PUT(pcq) \ + AnnotatePCQPut(__FILE__, __LINE__, pcq) + + /* Report that we've just got an element from a FIFO queue at address "pcq". */ + #define ANNOTATE_PCQ_GET(pcq) \ + AnnotatePCQGet(__FILE__, __LINE__, pcq) + + /* ------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express the + program's synchronization using the other annotations, but these can + be used when all else fails. */ + + /* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ + #define ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ + sizeof(*(pointer)), description) + + /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) + + /* Request the analysis tool to ignore all reads in the current thread + until ANNOTATE_IGNORE_READS_END is called. + Useful to ignore intentional racey reads, while still checking + other reads and all writes. + See also ANNOTATE_UNPROTECTED_READ. */ + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + /* Stop ignoring reads. */ + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + /* Stop ignoring writes. */ + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + /* Start ignoring all memory accesses (reads and writes). */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do {\ + ANNOTATE_IGNORE_READS_BEGIN();\ + ANNOTATE_IGNORE_WRITES_BEGIN();\ + }while(0)\ + + /* Stop ignoring all memory accesses. */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do {\ + ANNOTATE_IGNORE_WRITES_END();\ + ANNOTATE_IGNORE_READS_END();\ + }while(0)\ + + /* Enable (enable!=0) or disable (enable==0) race detection for all threads. + This annotation could be useful if you want to skip expensive race analysis + during some period of program execution, e.g. during initialization. */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ + AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) + + /* ------------------------------------------------------------- + Annotations useful for debugging. */ + + /* Request to trace every access to "address". */ + #define ANNOTATE_TRACE_MEMORY(address) \ + AnnotateTraceMemory(__FILE__, __LINE__, address) + + /* Report the current thread name to a race detector. */ + #define ANNOTATE_THREAD_NAME(name) \ + AnnotateThreadName(__FILE__, __LINE__, name) + + /* ------------------------------------------------------------- + Annotations useful when implementing locks. They are not + normally needed by modules that merely use locks. + The "lock" argument is a pointer to the lock object. */ + + /* Report that a lock has been created at address "lock". */ + #define ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" is about to be destroyed. */ + #define ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" has been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + /* Report that the lock at address "lock" is about to be released. */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + /* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. */ + + /* Report that the "barrier" has been initialized with initial "count". + If 'reinitialization_allowed' is true, initialization is allowed to happen + multiple times w/o calling barrier_destroy() */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ + reinitialization_allowed) + + /* Report that we are about to enter barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) + + /* Report that we just exited barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) + + /* Report that the "barrier" has been destroyed. */ + #define ANNOTATE_BARRIER_DESTROY(barrier) \ + AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) + + /* ------------------------------------------------------------- + Annotations useful for testing race detectors. */ + + /* Report that we expect a race on the variable at "address". + Use only in unit tests for a race detector. */ + #define ANNOTATE_EXPECT_RACE(address, description) \ + AnnotateExpectRace(__FILE__, __LINE__, address, description) + + /* A no-op. Insert where you like to test the interceptors. */ + #define ANNOTATE_NO_OP(arg) \ + AnnotateNoOp(__FILE__, __LINE__, arg) + + /* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ + #define ANNOTATE_FLUSH_STATE() \ + AnnotateFlushState(__FILE__, __LINE__) + + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ + #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ + #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ + #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ + #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ + #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ + #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ + #define ANNOTATE_PCQ_PUT(pcq) /* empty */ + #define ANNOTATE_PCQ_GET(pcq) /* empty */ + #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ + #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ + #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ + #define ANNOTATE_THREAD_NAME(name) /* empty */ + #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_END() /* empty */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_WRITES_END() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ + #define ANNOTATE_NO_OP(arg) /* empty */ + #define ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Macro definitions for GCC attributes that allow static thread safety + analysis to recognize and use some of the dynamic annotations as + escape hatches. + TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the + default crosstool/GCC supports these GCC attributes. */ + +#define ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ; +#define ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_UNPROTECTED_READ + +#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \ + defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__) + +#if DYNAMIC_ANNOTATIONS_ENABLED == 0 +#define ANNOTALYSIS_ONLY 1 +#undef ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_STATIC_INLINE static inline +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; } +#endif + +/* Only emit attributes when annotalysis is enabled. */ +#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__) +#undef ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__ ((ignore_reads_begin)) +#undef ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_READS_END __attribute__ ((ignore_reads_end)) +#undef ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin)) +#undef ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__ ((ignore_writes_end)) +#undef ANNOTALYSIS_UNPROTECTED_READ +#define ANNOTALYSIS_UNPROTECTED_READ __attribute__ ((unprotected_read)) +#endif + +#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) + +/* Use the macros above rather than using these functions directly. */ +#ifdef __cplusplus +extern "C" { +#endif +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed); +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier); +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *address, + long size, + const char *description); +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +void AnnotateThreadName(const char *file, int line, + const char *name); +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +void AnnotateEnableRaceDetection(const char *file, int line, int enable); +void AnnotateNoOp(const char *file, int line, + const volatile void *arg); +void AnnotateFlushState(const char *file, int line); + +/* Return non-zero value if running under valgrind. + + If "valgrind.h" is included into dynamic_annotations.c, + the regular valgrind mechanism will be used. + See http://valgrind.org/docs/manual/manual-core-adv.html about + RUNNING_ON_VALGRIND and other valgrind "client requests". + The file "valgrind.h" may be obtained by doing + svn co svn://svn.valgrind.org/valgrind/trunk/include + + If for some reason you can't use "valgrind.h" or want to fake valgrind, + there are two ways to make this function return non-zero: + - Use environment variable: export RUNNING_ON_VALGRIND=1 + - Make your tool intercept the function RunningOnValgrind() and + change its return value. + */ +int RunningOnValgrind(void); + +/* ValgrindSlowdown returns: + * 1.0, if (RunningOnValgrind() == 0) + * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL) + * atof(getenv("VALGRIND_SLOWDOWN")) otherwise + This function can be used to scale timeout values: + EXAMPLE: + for (;;) { + DoExpensiveBackgroundTask(); + SleepForSeconds(5 * ValgrindSlowdown()); + } + */ +double ValgrindSlowdown(void); + +#ifdef __cplusplus +} +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) + + /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + ANNOTALYSIS_UNPROTECTED_READ { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var ## _annotator { \ + public: \ + static_var ## _annotator() { \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ + sizeof(static_var), \ + # static_var ": " description); \ + } \ + }; \ + static static_var ## _annotator the ## static_var ## _annotator;\ + } +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_UNPROTECTED_READ(x) (x) + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Annotalysis, a GCC based static analyzer, is able to understand and use + some of the dynamic annotations defined in this file. However, dynamic + annotations are usually disabled in the opt mode (to avoid additional + runtime overheads) while Annotalysis only works in the opt mode. + In order for Annotalysis to use these dynamic annotations when they + are disabled, we re-define these annotations here. Note that unlike the + original macro definitions above, these macros are expanded to calls to + static inline functions so that the compiler will be able to remove the + calls after the analysis. */ + +#ifdef ANNOTALYSIS_ONLY + + #undef ANNOTALYSIS_ONLY + + /* Undefine and re-define the macros that the static analyzer understands. */ + #undef ANNOTATE_IGNORE_READS_BEGIN + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_END + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_BEGIN + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_END + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + }while(0) \ + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_END + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + }while(0) \ + + #if defined(__cplusplus) + #undef ANNOTATE_UNPROTECTED_READ + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + ANNOTALYSIS_UNPROTECTED_READ { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + #endif /* __cplusplus */ + +#endif /* ANNOTALYSIS_ONLY */ + +/* Undefine the macros intended only in this file. */ +#undef ANNOTALYSIS_STATIC_INLINE +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY + +#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ diff --git a/src/third_party/gperftools-2.7/src/base/elf_mem_image.cc b/src/third_party/gperftools-2.7/src/base/elf_mem_image.cc new file mode 100644 index 00000000000..d2ca1a5e131 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/elf_mem_image.cc @@ -0,0 +1,434 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in an in-memory Elf image. +// + +#include "base/elf_mem_image.h" + +#ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h + +#include <stddef.h> // for size_t, ptrdiff_t +#include "base/logging.h" + +// From binutils/include/elf/common.h (this doesn't appear to be documented +// anywhere else). +// +// /* This flag appears in a Versym structure. It means that the symbol +// is hidden, and is only visible with an explicit version number. +// This is a GNU extension. */ +// #define VERSYM_HIDDEN 0x8000 +// +// /* This is the mask for the rest of the Versym information. */ +// #define VERSYM_VERSION 0x7fff + +#define VERSYM_VERSION 0x7fff + +namespace base { + +namespace { +template <int N> class ElfClass { + public: + static const int kElfClass = -1; + static int ElfBind(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } + static int ElfType(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } +}; + +template <> class ElfClass<32> { + public: + static const int kElfClass = ELFCLASS32; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF32_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF32_ST_TYPE(symbol->st_info); + } +}; + +template <> class ElfClass<64> { + public: + static const int kElfClass = ELFCLASS64; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF64_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF64_ST_TYPE(symbol->st_info); + } +}; + +typedef ElfClass<__WORDSIZE> CurrentElfClass; + +// Extract an element from one of the ELF tables, cast it to desired type. +// This is just a simple arithmetic and a glorified cast. +// Callers are responsible for bounds checking. +template <class T> +const T* GetTableElement(const ElfW(Ehdr) *ehdr, + ElfW(Off) table_offset, + ElfW(Word) element_size, + size_t index) { + return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) + + table_offset + + index * element_size); +} +} // namespace + +const void *const ElfMemImage::kInvalidBase = + reinterpret_cast<const void *>(~0L); + +ElfMemImage::ElfMemImage(const void *base) { + CHECK(base != kInvalidBase); + Init(base); +} + +int ElfMemImage::GetNumSymbols() const { + if (!hash_) { + return 0; + } + // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash + return hash_[1]; +} + +const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return dynsym_ + index; +} + +const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return versym_ + index; +} + +const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { + CHECK_LT(index, ehdr_->e_phnum); + return GetTableElement<ElfW(Phdr)>(ehdr_, + ehdr_->e_phoff, + ehdr_->e_phentsize, + index); +} + +const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { + // Symbol corresponds to "special" (e.g. SHN_ABS) section. + return reinterpret_cast<const void *>(sym->st_value); + } + CHECK_LT(link_base_, sym->st_value); + return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; +} + +const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { + CHECK_LE(index, verdefnum_); + const ElfW(Verdef) *version_definition = verdef_; + while (version_definition->vd_ndx < index && version_definition->vd_next) { + const char *const version_definition_as_char = + reinterpret_cast<const char *>(version_definition); + version_definition = + reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + + version_definition->vd_next); + } + return version_definition->vd_ndx == index ? version_definition : NULL; +} + +const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( + const ElfW(Verdef) *verdef) const { + return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); +} + +const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +void ElfMemImage::Init(const void *base) { + ehdr_ = NULL; + dynsym_ = NULL; + dynstr_ = NULL; + versym_ = NULL; + verdef_ = NULL; + hash_ = NULL; + strsize_ = 0; + verdefnum_ = 0; + link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. + if (!base) { + return; + } + const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); + // Fake VDSO has low bit set. + const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); + base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); + const char *const base_as_char = reinterpret_cast<const char *>(base); + if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || + base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { + RAW_DCHECK(false, "no ELF magic"); // at %p", base); + return; + } + int elf_class = base_as_char[EI_CLASS]; + if (elf_class != CurrentElfClass::kElfClass) { + DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); + return; + } + switch (base_as_char[EI_DATA]) { + case ELFDATA2LSB: { + if (__LITTLE_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + case ELFDATA2MSB: { + if (__BIG_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + default: { + RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; + return; + } + } + + ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); + const ElfW(Phdr) *dynamic_program_header = NULL; + for (int i = 0; i < ehdr_->e_phnum; ++i) { + const ElfW(Phdr) *const program_header = GetPhdr(i); + switch (program_header->p_type) { + case PT_LOAD: + if (link_base_ == ~0L) { + link_base_ = program_header->p_vaddr; + } + break; + case PT_DYNAMIC: + dynamic_program_header = program_header; + break; + } + } + if (link_base_ == ~0L || !dynamic_program_header) { + RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); + RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } + ptrdiff_t relocation = + base_as_char - reinterpret_cast<const char *>(link_base_); + ElfW(Dyn) *dynamic_entry = + reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + + relocation); + for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { + ElfW(Xword) value = dynamic_entry->d_un.d_val; + if (fake_vdso) { + // A complication: in the real VDSO, dynamic entries are not relocated + // (it wasn't loaded by a dynamic loader). But when testing with a + // "fake" dlopen()ed vdso library, the loader relocates some (but + // not all!) of them before we get here. + if (dynamic_entry->d_tag == DT_VERDEF) { + // The only dynamic entry (of the ones we care about) libc-2.3.6 + // loader doesn't relocate. + value += relocation; + } + } else { + // Real VDSO. Everything needs to be relocated. + value += relocation; + } + switch (dynamic_entry->d_tag) { + case DT_HASH: + hash_ = reinterpret_cast<ElfW(Word) *>(value); + break; + case DT_SYMTAB: + dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); + break; + case DT_STRTAB: + dynstr_ = reinterpret_cast<const char *>(value); + break; + case DT_VERSYM: + versym_ = reinterpret_cast<ElfW(Versym) *>(value); + break; + case DT_VERDEF: + verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); + break; + case DT_VERDEFNUM: + verdefnum_ = dynamic_entry->d_un.d_val; + break; + case DT_STRSZ: + strsize_ = dynamic_entry->d_un.d_val; + break; + default: + // Unrecognized entries explicitly ignored. + break; + } + } + if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + !verdef_ || !verdefnum_ || !strsize_) { + RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); + RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); + RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); + RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); + RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); + RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); + RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } +} + +bool ElfMemImage::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && + CurrentElfClass::ElfType(it->symbol) == type) { + if (info) { + *info = *it; + } + return true; + } + } + return false; +} + +bool ElfMemImage::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + const char *const symbol_start = + reinterpret_cast<const char *>(it->address); + const char *const symbol_end = symbol_start + it->symbol->st_size; + if (symbol_start <= address && address < symbol_end) { + if (info_out) { + // Client wants to know details for that symbol (the usual case). + if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { + // Strong symbol; just return it. + *info_out = *it; + return true; + } else { + // Weak or local. Record it, but keep looking for a strong one. + *info_out = *it; + } + } else { + // Client only cares if there is an overlapping symbol. + return true; + } + } + } + return false; +} + +ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) + : index_(index), image_(image) { +} + +const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { + return &info_; +} + +const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { + return info_; +} + +bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { + return this->image_ == rhs.image_ && this->index_ == rhs.index_; +} + +bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { + return !(*this == rhs); +} + +ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { + this->Update(1); + return *this; +} + +ElfMemImage::SymbolIterator ElfMemImage::begin() const { + SymbolIterator it(this, 0); + it.Update(0); + return it; +} + +ElfMemImage::SymbolIterator ElfMemImage::end() const { + return SymbolIterator(this, GetNumSymbols()); +} + +void ElfMemImage::SymbolIterator::Update(int increment) { + const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); + CHECK(image->IsPresent() || increment == 0); + if (!image->IsPresent()) { + return; + } + index_ += increment; + if (index_ >= image->GetNumSymbols()) { + index_ = image->GetNumSymbols(); + return; + } + const ElfW(Sym) *symbol = image->GetDynsym(index_); + const ElfW(Versym) *version_symbol = image->GetVersym(index_); + CHECK(symbol && version_symbol); + const char *const symbol_name = image->GetDynstr(symbol->st_name); + const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; + const ElfW(Verdef) *version_definition = NULL; + const char *version_name = ""; + if (symbol->st_shndx == SHN_UNDEF) { + // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and + // version_index could well be greater than verdefnum_, so calling + // GetVerdef(version_index) may trigger assertion. + } else { + version_definition = image->GetVerdef(version_index); + } + if (version_definition) { + // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, + // optional 2nd if the version has a parent. + CHECK_LE(1, version_definition->vd_cnt); + CHECK_LE(version_definition->vd_cnt, 2); + const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); + version_name = image->GetVerstr(version_aux->vda_name); + } + info_.name = symbol_name; + info_.version = version_name; + info_.address = image->GetSymAddr(symbol); + info_.symbol = symbol; +} + +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE diff --git a/src/third_party/gperftools-2.7/src/base/elf_mem_image.h b/src/third_party/gperftools-2.7/src/base/elf_mem_image.h new file mode 100644 index 00000000000..5fb00fffb5f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/elf_mem_image.h @@ -0,0 +1,135 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup for in-memory Elf images. + +#ifndef BASE_ELF_MEM_IMAGE_H_ +#define BASE_ELF_MEM_IMAGE_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif + +// Maybe one day we can rewrite this file not to require the elf +// symbol extensions in glibc, but for right now we need them. +#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__) + +#define HAVE_ELF_MEM_IMAGE 1 + +#include <stdlib.h> +#include <link.h> // for ElfW + +namespace base { + +// An in-memory ELF image (may not exist on disk). +class ElfMemImage { + public: + // Sentinel: there could never be an elf image at this address. + static const void *const kInvalidBase; + + // Information about a single vdso symbol. + // All pointers are into .dynsym, .dynstr, or .text of the VDSO. + // Do not free() them or modify through them. + struct SymbolInfo { + const char *name; // E.g. "__vdso_getcpu" + const char *version; // E.g. "LINUX_2.6", could be "" + // for unversioned symbol. + const void *address; // Relocated symbol address. + const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. + }; + + // Supports iteration over all dynamic symbols. + class SymbolIterator { + public: + friend class ElfMemImage; + const SymbolInfo *operator->() const; + const SymbolInfo &operator*() const; + SymbolIterator& operator++(); + bool operator!=(const SymbolIterator &rhs) const; + bool operator==(const SymbolIterator &rhs) const; + private: + SymbolIterator(const void *const image, int index); + void Update(int incr); + SymbolInfo info_; + int index_; + const void *const image_; + }; + + + explicit ElfMemImage(const void *base); + void Init(const void *base); + bool IsPresent() const { return ehdr_ != NULL; } + const ElfW(Phdr)* GetPhdr(int index) const; + const ElfW(Sym)* GetDynsym(int index) const; + const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Verdef)* GetVerdef(int index) const; + const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; + const char* GetDynstr(ElfW(Word) offset) const; + const void* GetSymAddr(const ElfW(Sym) *sym) const; + const char* GetVerstr(ElfW(Word) offset) const; + int GetNumSymbols() const; + + SymbolIterator begin() const; + SymbolIterator end() const; + + // Look up versioned dynamic symbol in the image. + // Returns false if image is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if image isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + private: + const ElfW(Ehdr) *ehdr_; + const ElfW(Sym) *dynsym_; + const ElfW(Versym) *versym_; + const ElfW(Verdef) *verdef_; + const ElfW(Word) *hash_; + const char *dynstr_; + size_t strsize_; + size_t verdefnum_; + ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). +}; + +} // namespace base + +#endif // __ELF__ and __GLIBC__ and !__native_client__ + +#endif // BASE_ELF_MEM_IMAGE_H_ diff --git a/src/third_party/gperftools-2.7/src/base/elfcore.h b/src/third_party/gperftools-2.7/src/base/elfcore.h new file mode 100644 index 00000000000..8193d422c40 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/elfcore.h @@ -0,0 +1,401 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke, Carl Crous + */ + +#ifndef _ELFCORE_H +#define _ELFCORE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* We currently only support x86-32, x86-64, ARM, MIPS, PPC on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) + +#include <stdarg.h> +#include <stdint.h> +#include <sys/types.h> +#include <config.h> + + +/* Define the DUMPER symbol to make sure that there is exactly one + * core dumper built into the library. + */ +#define DUMPER "ELF" + +/* By the time that we get a chance to read CPU registers in the + * calling thread, they are already in a not particularly useful + * state. Besides, there will be multiple frames on the stack that are + * just making the core file confusing. To fix this problem, we take a + * snapshot of the frame pointer, stack pointer, and instruction + * pointer at an earlier time, and then insert these values into the + * core file. + */ + +#if defined(__i386__) || defined(__x86_64__) + typedef struct i386_regs { /* Normal (non-FPU) CPU registers */ + #ifdef __x86_64__ + #define BP rbp + #define SP rsp + #define IP rip + uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10; + uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax; + uint64_t rip,cs,eflags; + uint64_t rsp,ss; + uint64_t fs_base, gs_base; + uint64_t ds,es,fs,gs; + #else + #define BP ebp + #define SP esp + #define IP eip + uint32_t ebx, ecx, edx, esi, edi, ebp, eax; + uint16_t ds, __ds, es, __es; + uint16_t fs, __fs, gs, __gs; + uint32_t orig_eax, eip; + uint16_t cs, __cs; + uint32_t eflags, esp; + uint16_t ss, __ss; + #endif + } i386_regs; +#elif defined(__arm__) + typedef struct arm_regs { /* General purpose registers */ + #define BP uregs[11] /* Frame pointer */ + #define SP uregs[13] /* Stack pointer */ + #define IP uregs[15] /* Program counter */ + #define LR uregs[14] /* Link register */ + long uregs[18]; + } arm_regs; +#elif defined(__mips__) + typedef struct mips_regs { + unsigned long pad[6]; /* Unused padding to match kernel structures */ + unsigned long uregs[32]; /* General purpose registers. */ + unsigned long hi; /* Used for multiplication and division. */ + unsigned long lo; + unsigned long cp0_epc; /* Program counter. */ + unsigned long cp0_badvaddr; + unsigned long cp0_status; + unsigned long cp0_cause; + unsigned long unused; + } mips_regs; +#elif defined (__PPC__) + typedef struct ppc_regs { + #define SP uregs[1] /* Stack pointer */ + #define IP rip /* Program counter */ + #define LR lr /* Link register */ + unsigned long uregs[32]; /* General Purpose Registers - r0-r31. */ + double fpr[32]; /* Floating-Point Registers - f0-f31. */ + unsigned long rip; /* Program counter. */ + unsigned long msr; + unsigned long ccr; + unsigned long lr; + unsigned long ctr; + unsigned long xeq; + unsigned long mq; + } ppc_regs; +#endif + +#if defined(__i386__) && defined(__GNUC__) + /* On x86 we provide an optimized version of the FRAME() macro, if the + * compiler supports a GCC-style asm() directive. This results in somewhat + * more accurate values for CPU registers. + */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%ebp\n" \ + "push %%ebx\n" \ + "mov %%ebx,0(%%eax)\n" \ + "mov %%ecx,4(%%eax)\n" \ + "mov %%edx,8(%%eax)\n" \ + "mov %%esi,12(%%eax)\n" \ + "mov %%edi,16(%%eax)\n" \ + "mov %%ebp,20(%%eax)\n" \ + "mov %%eax,24(%%eax)\n" \ + "mov %%ds,%%ebx\n" \ + "mov %%ebx,28(%%eax)\n" \ + "mov %%es,%%ebx\n" \ + "mov %%ebx,32(%%eax)\n" \ + "mov %%fs,%%ebx\n" \ + "mov %%ebx,36(%%eax)\n" \ + "mov %%gs,%%ebx\n" \ + "mov %%ebx, 40(%%eax)\n" \ + "call 0f\n" \ + "0:pop %%ebx\n" \ + "add $1f-0b,%%ebx\n" \ + "mov %%ebx,48(%%eax)\n" \ + "mov %%cs,%%ebx\n" \ + "mov %%ebx,52(%%eax)\n" \ + "pushf\n" \ + "pop %%ebx\n" \ + "mov %%ebx,56(%%eax)\n" \ + "mov %%esp,%%ebx\n" \ + "add $8,%%ebx\n" \ + "mov %%ebx,60(%%eax)\n" \ + "mov %%ss,%%ebx\n" \ + "mov %%ebx,64(%%eax)\n" \ + "pop %%ebx\n" \ + "pop %%ebp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__x86_64__) && defined(__GNUC__) + /* The FRAME and SET_FRAME macros for x86_64. */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%rbp\n" \ + "push %%rbx\n" \ + "mov %%r15,0(%%rax)\n" \ + "mov %%r14,8(%%rax)\n" \ + "mov %%r13,16(%%rax)\n" \ + "mov %%r12,24(%%rax)\n" \ + "mov %%rbp,32(%%rax)\n" \ + "mov %%rbx,40(%%rax)\n" \ + "mov %%r11,48(%%rax)\n" \ + "mov %%r10,56(%%rax)\n" \ + "mov %%r9,64(%%rax)\n" \ + "mov %%r8,72(%%rax)\n" \ + "mov %%rax,80(%%rax)\n" \ + "mov %%rcx,88(%%rax)\n" \ + "mov %%rdx,96(%%rax)\n" \ + "mov %%rsi,104(%%rax)\n" \ + "mov %%rdi,112(%%rax)\n" \ + "mov %%ds,%%rbx\n" \ + "mov %%rbx,184(%%rax)\n" \ + "mov %%es,%%rbx\n" \ + "mov %%rbx,192(%%rax)\n" \ + "mov %%fs,%%rbx\n" \ + "mov %%rbx,200(%%rax)\n" \ + "mov %%gs,%%rbx\n" \ + "mov %%rbx,208(%%rax)\n" \ + "call 0f\n" \ + "0:pop %%rbx\n" \ + "add $1f-0b,%%rbx\n" \ + "mov %%rbx,128(%%rax)\n" \ + "mov %%cs,%%rbx\n" \ + "mov %%rbx,136(%%rax)\n" \ + "pushf\n" \ + "pop %%rbx\n" \ + "mov %%rbx,144(%%rax)\n" \ + "mov %%rsp,%%rbx\n" \ + "add $16,%%ebx\n" \ + "mov %%rbx,152(%%rax)\n" \ + "mov %%ss,%%rbx\n" \ + "mov %%rbx,160(%%rax)\n" \ + "pop %%rbx\n" \ + "pop %%rbp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (f).uregs.fs_base = (r).fs_base; \ + (f).uregs.gs_base = (r).gs_base; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__arm__) && defined(__GNUC__) + /* ARM calling conventions are a little more tricky. A little assembly + * helps in obtaining an accurate snapshot of all registers. + */ + typedef struct Frame { + struct arm_regs arm; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + long cpsr; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile( \ + "stmia %0, {r0-r15}\n" /* All integer regs */\ + : : "r"(&f.arm) : "memory"); \ + f.arm.uregs[16] = 0; \ + __asm__ volatile( \ + "mrs %0, cpsr\n" /* Condition code reg */\ + : "=r"(cpsr)); \ + f.arm.uregs[17] = cpsr; \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + /* Don't override the FPU status register. */\ + /* Use the value obtained from ptrace(). This*/\ + /* works, because our code does not perform */\ + /* any FPU operations, itself. */\ + long fps = (f).arm.uregs[16]; \ + errno = (f).errno_; \ + (r) = (f).arm; \ + (r).uregs[16] = fps; \ + } while (0) +#elif defined(__mips__) && defined(__GNUC__) + typedef struct Frame { + struct mips_regs mips_regs; + int errno_; + pid_t tid; + } Frame; + #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; }) + #define FRAME(f) Frame f = { 0 }; \ + do { \ + unsigned long hi, lo; \ + register unsigned long pc __asm__("$31"); \ + f.mips_regs.uregs[ 0] = MIPSREG( 0); \ + f.mips_regs.uregs[ 1] = MIPSREG( 1); \ + f.mips_regs.uregs[ 2] = MIPSREG( 2); \ + f.mips_regs.uregs[ 3] = MIPSREG( 3); \ + f.mips_regs.uregs[ 4] = MIPSREG( 4); \ + f.mips_regs.uregs[ 5] = MIPSREG( 5); \ + f.mips_regs.uregs[ 6] = MIPSREG( 6); \ + f.mips_regs.uregs[ 7] = MIPSREG( 7); \ + f.mips_regs.uregs[ 8] = MIPSREG( 8); \ + f.mips_regs.uregs[ 9] = MIPSREG( 9); \ + f.mips_regs.uregs[10] = MIPSREG(10); \ + f.mips_regs.uregs[11] = MIPSREG(11); \ + f.mips_regs.uregs[12] = MIPSREG(12); \ + f.mips_regs.uregs[13] = MIPSREG(13); \ + f.mips_regs.uregs[14] = MIPSREG(14); \ + f.mips_regs.uregs[15] = MIPSREG(15); \ + f.mips_regs.uregs[16] = MIPSREG(16); \ + f.mips_regs.uregs[17] = MIPSREG(17); \ + f.mips_regs.uregs[18] = MIPSREG(18); \ + f.mips_regs.uregs[19] = MIPSREG(19); \ + f.mips_regs.uregs[20] = MIPSREG(20); \ + f.mips_regs.uregs[21] = MIPSREG(21); \ + f.mips_regs.uregs[22] = MIPSREG(22); \ + f.mips_regs.uregs[23] = MIPSREG(23); \ + f.mips_regs.uregs[24] = MIPSREG(24); \ + f.mips_regs.uregs[25] = MIPSREG(25); \ + f.mips_regs.uregs[26] = MIPSREG(26); \ + f.mips_regs.uregs[27] = MIPSREG(27); \ + f.mips_regs.uregs[28] = MIPSREG(28); \ + f.mips_regs.uregs[29] = MIPSREG(29); \ + f.mips_regs.uregs[30] = MIPSREG(30); \ + f.mips_regs.uregs[31] = MIPSREG(31); \ + __asm__ volatile ("mfhi %0" : "=r"(hi)); \ + __asm__ volatile ("mflo %0" : "=r"(lo)); \ + __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc)); \ + f.mips_regs.hi = hi; \ + f.mips_regs.lo = lo; \ + f.mips_regs.cp0_epc = pc; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + memcpy((r).uregs, (f).mips_regs.uregs, \ + 32*sizeof(unsigned long)); \ + (r).hi = (f).mips_regs.hi; \ + (r).lo = (f).mips_regs.lo; \ + (r).cp0_epc = (f).mips_regs.cp0_epc; \ + } while (0) +#else + /* If we do not have a hand-optimized assembly version of the FRAME() + * macro, we cannot reliably unroll the stack. So, we show a few additional + * stack frames for the coredumper. + */ + typedef struct Frame { + pid_t tid; + } Frame; + #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0) + #define SET_FRAME(f,r) do { } while (0) +#endif + + +/* Internal function for generating a core file. This API can change without + * notice and is only supposed to be used internally by the core dumper. + * + * This function works for both single- and multi-threaded core + * dumps. If called as + * + * FRAME(frame); + * InternalGetCoreDump(&frame, 0, NULL, ap); + * + * it creates a core file that only contains information about the + * calling thread. + * + * Optionally, the caller can provide information about other threads + * by passing their process ids in "thread_pids". The process id of + * the caller should not be included in this array. All of the threads + * must have been attached to with ptrace(), prior to calling this + * function. They will be detached when "InternalGetCoreDump()" returns. + * + * This function either returns a file handle that can be read for obtaining + * a core dump, or "-1" in case of an error. In the latter case, "errno" + * will be set appropriately. + * + * While "InternalGetCoreDump()" is not technically async signal safe, you + * might be tempted to invoke it from a signal handler. The code goes to + * great lengths to make a best effort that this will actually work. But in + * any case, you must make sure that you preserve the value of "errno" + * yourself. It is guaranteed to be clobbered otherwise. + * + * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again, + * it makes a best effort to behave reasonably when called in a multi- + * threaded environment, but it is ultimately the caller's responsibility + * to provide locking. + */ +int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids, + va_list ap + /* const struct CoreDumpParameters *params, + const char *file_name, + const char *PATH + */); + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* _ELFCORE_H */ diff --git a/src/third_party/gperftools-2.7/src/base/googleinit.h b/src/third_party/gperftools-2.7/src/base/googleinit.h new file mode 100644 index 00000000000..3ea411a325a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/googleinit.h @@ -0,0 +1,74 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Jacob Hoffman-Andrews + +#ifndef _GOOGLEINIT_H +#define _GOOGLEINIT_H + +#include "base/logging.h" + +class GoogleInitializer { + public: + typedef void (*VoidFunction)(void); + GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor) + : name_(name), destructor_(dtor) { + RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_); + if (ctor) + ctor(); + } + ~GoogleInitializer() { + RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_); + if (destructor_) + destructor_(); + } + + private: + const char* const name_; + const VoidFunction destructor_; +}; + +#define REGISTER_MODULE_INITIALIZER(name, body) \ + namespace { \ + static void google_init_module_##name () { body; } \ + GoogleInitializer google_initializer_module_##name(#name, \ + google_init_module_##name, NULL); \ + } + +#define REGISTER_MODULE_DESTRUCTOR(name, body) \ + namespace { \ + static void google_destruct_module_##name () { body; } \ + GoogleInitializer google_destructor_module_##name(#name, \ + NULL, google_destruct_module_##name); \ + } + + +#endif /* _GOOGLEINIT_H */ diff --git a/src/third_party/gperftools-2.7/src/base/linux_syscall_support.h b/src/third_party/gperftools-2.7/src/base/linux_syscall_support.h new file mode 100644 index 00000000000..13aa415e250 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/linux_syscall_support.h @@ -0,0 +1,2913 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +/* This file includes Linux-specific support functions common to the + * coredumper and the thread lister; primarily, this is a collection + * of direct system calls, and a couple of symbols missing from + * standard header files. + * There are a few options that the including file can set to control + * the behavior of this file: + * + * SYS_CPLUSPLUS: + * The entire header file will normally be wrapped in 'extern "C" { }", + * making it suitable for compilation as both C and C++ source. If you + * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit + * the wrapping. N.B. doing so will suppress inclusion of all prerequisite + * system header files, too. It is the caller's responsibility to provide + * the necessary definitions. + * + * SYS_ERRNO: + * All system calls will update "errno" unless overriden by setting the + * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be + * an l-value. + * + * SYS_INLINE: + * New symbols will be defined "static inline", unless overridden by + * the SYS_INLINE macro. + * + * SYS_LINUX_SYSCALL_SUPPORT_H + * This macro is used to avoid multiple inclusions of this header file. + * If you need to include this file more than once, make sure to + * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. + * + * SYS_PREFIX: + * New system calls will have a prefix of "sys_" unless overridden by + * the SYS_PREFIX macro. Valid values for this macro are [0..9] which + * results in prefixes "sys[0..9]_". It is also possible to set this + * macro to -1, which avoids all prefixes. + * + * This file defines a few internal symbols that all start with "LSS_". + * Do not access these symbols from outside this file. They are not part + * of the supported API. + * + * NOTE: This is a stripped down version of the official opensource + * version of linux_syscall_support.h, which lives at + * http://code.google.com/p/linux-syscall-support/ + * It includes only the syscalls that are used in perftools, plus a + * few extra. Here's the breakdown: + * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u + * sys__exit( + * sys_clone( + * sys_close( + * sys_fcntl( + * sys_fstat( + * sys_futex( + * sys_getcpu( + * sys_getdents64( + * sys_getppid( + * sys_gettid( + * sys_lseek( + * sys_mmap( + * sys_mremap( + * sys_munmap( + * sys_open( + * sys_pipe( + * sys_prctl( + * sys_ptrace( + * sys_ptrace_detach( + * sys_read( + * sys_sched_yield( + * sys_sigaction( + * sys_sigaltstack( + * sys_sigdelset( + * sys_sigfillset( + * sys_sigprocmask( + * sys_socket( + * sys_stat( + * sys_waitpid( + * 2) These are used as subroutines of the above: + * sys_getpid -- gettid + * sys_kill -- ptrace_detach + * sys_restore -- sigaction + * sys_restore_rt -- sigaction + * sys_socketcall -- socket + * sys_wait4 -- waitpid + * 3) I left these in even though they're not used. They either + * complement the above (write vs read) or are variants (rt_sigaction): + * sys_fstat64 + * sys_llseek + * sys_mmap2 + * sys_openat + * sys_getdents + * sys_rt_sigaction + * sys_rt_sigprocmask + * sys_sigaddset + * sys_sigemptyset + * sys_stat64 + * sys_write + */ +#ifndef SYS_LINUX_SYSCALL_SUPPORT_H +#define SYS_LINUX_SYSCALL_SUPPORT_H + +/* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64, Aarch64, s390 and s390x + * on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__PPC__) || \ + defined(__aarch64__) || defined(__s390__)) \ + && (defined(__linux)) + +#ifndef SYS_CPLUSPLUS +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <endian.h> +#include <fcntl.h> + +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#include <sgidefs.h> +#endif + +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: + * + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. + */ + +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +#if defined(__i386__) || defined(__arm__) \ + || defined(__PPC__) || (defined(__s390__) && !defined(__s390x__)) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#elif defined(__aarch64__) + // No kernel_old_sigaction defined for arm64. +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,generic,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, siginfo_t *, void *); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/asm-{arm,i386,mips,ppc,s390}/stat.h */ +#ifdef __mips__ +#if (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long long st_size; + long long st_blksize; + long long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,generic,i386,mips,x86_64,ppc,s390}/stat.h */ +#if defined(__i386__) || defined(__arm__) +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +struct kernel_stat { + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + uint64_t st_rdev; + int64_t st_size; + int64_t st_blksize; + int64_t st_blocks; + uint64_t st_atime_; + uint64_t st_atime_nsec_; + uint64_t st_mtime_; + uint64_t st_mtime_nsec_; + uint64_t st_ctime_; + uint64_t st_ctime_nsec_; + int64_t __unused[3]; +}; +#elif defined(__PPC__) +struct kernel_stat { + unsigned long long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned long st_mode; + unsigned st_uid; + unsigned st_gid; + int __pad2; + unsigned long long st_rdev; + long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + kernel_timespec st_atim; + kernel_timespec st_mtim; + kernel_timespec st_ctim; + unsigned long __unused4; + unsigned long __unused5; + unsigned long __unused6; +}; +#elif defined(__mips__) \ + && !(_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) +struct kernel_stat { + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; +}; +#elif defined(__aarch64__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned int __unused4; + unsigned int __unused5; +}; +#elif defined(__s390x__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad1; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; +#elif defined(__s390__) +struct kernel_stat { + unsigned short st_dev; + unsigned short __pad1; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + unsigned short __pad2; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#endif + + +/* Definitions missing from the standard header files */ +#ifndef O_DIRECTORY +#if defined(__arm__) +#define O_DIRECTORY 0040000 +#else +#define O_DIRECTORY 0200000 +#endif +#endif +#ifndef PR_GET_DUMPABLE +#define PR_GET_DUMPABLE 3 +#endif +#ifndef PR_SET_DUMPABLE +#define PR_SET_DUMPABLE 4 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif +#ifndef AT_SYMLINK_NOFOLLOW +#define AT_SYMLINK_NOFOLLOW 0x100 +#endif +#ifndef AT_REMOVEDIR +#define AT_REMOVEDIR 0x200 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif + +#if defined(__i386__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_gettid +#define __NR_gettid 224 +#endif +#ifndef __NR_futex +#define __NR_futex 240 +#endif +#ifndef __NR_openat +#define __NR_openat 295 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 318 +#endif +/* End of i386 definitions */ +#elif defined(__arm__) +#ifndef __syscall +#if defined(__thumb__) || defined(__ARM_EABI__) +#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name; +#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs +#define __syscall(name) "swi\t0" +#define __syscall_safe(name) \ + "push {r7}\n" \ + "mov r7,%[sysreg]\n" \ + __syscall(name)"\n" \ + "pop {r7}" +#else +#define __SYS_REG(name) +#define __SYS_REG_LIST(regs...) regs +#define __syscall(name) "swi\t" __sys1(__NR_##name) "" +#define __syscall_safe(name) __syscall(name) +#endif +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +/* End of ARM definitions */ +#elif defined(__x86_64__) +#ifndef __NR_gettid +#define __NR_gettid 186 +#endif +#ifndef __NR_futex +#define __NR_futex 202 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 217 +#endif +#ifndef __NR_openat +#define __NR_openat 257 +#endif +/* End of x86-64 definitions */ +#elif defined(__mips__) +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_Linux + 219) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 222) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 238) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 288) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 293) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 312) +#endif +/* End of MIPS (old 32bit API) definitions */ +#elif (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32) +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 247) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 252) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 271) +#endif +/* End of MIPS (64bit API) definitions */ +#else +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 251) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 256) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 275) +#endif +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_socket +#define __NR_socket 198 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 302 +#endif +/* End of powerpc defininitions */ +#elif defined(__aarch64__) +#ifndef __NR_fstatat +#define __NR_fstatat 79 +#endif +/* End of aarch64 defininitions */ +#elif defined(__s390__) +#ifndef __NR_quotactl +#define __NR_quotactl 131 +#endif +#ifndef __NR_rt_sigreturn +#define __NR_rt_sigreturn 173 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#endif +#ifndef __NR_rt_sigprocmask +#define __NR_rt_sigprocmask 175 +#endif +#ifndef __NR_rt_sigpending +#define __NR_rt_sigpending 176 +#endif +#ifndef __NR_rt_sigsuspend +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_readahead +#define __NR_readahead 222 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 224 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 225 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 227 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 228 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 230 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 231 +#endif +#ifndef __NR_gettid +#define __NR_gettid 236 +#endif +#ifndef __NR_tkill +#define __NR_tkill 237 +#endif +#ifndef __NR_futex +#define __NR_futex 238 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 239 +#endif +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 240 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 252 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 260 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 261 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 265 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 266 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 282 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 283 +#endif +#ifndef __NR_openat +#define __NR_openat 288 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 294 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 310 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 311 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 314 +#endif +/* Some syscalls are named/numbered differently between s390 and s390x. */ +#ifdef __s390x__ +# ifndef __NR_getrlimit +# define __NR_getrlimit 191 +# endif +# ifndef __NR_setresuid +# define __NR_setresuid 208 +# endif +# ifndef __NR_getresuid +# define __NR_getresuid 209 +# endif +# ifndef __NR_setresgid +# define __NR_setresgid 210 +# endif +# ifndef __NR_getresgid +# define __NR_getresgid 211 +# endif +# ifndef __NR_setfsuid +# define __NR_setfsuid 215 +# endif +# ifndef __NR_setfsgid +# define __NR_setfsgid 216 +# endif +# ifndef __NR_fadvise64 +# define __NR_fadvise64 253 +# endif +# ifndef __NR_newfstatat +# define __NR_newfstatat 293 +# endif +#else /* __s390x__ */ +# ifndef __NR_getrlimit +# define __NR_getrlimit 76 +# endif +# ifndef __NR_setfsuid +# define __NR_setfsuid 138 +# endif +# ifndef __NR_setfsgid +# define __NR_setfsgid 139 +# endif +# ifndef __NR_setresuid +# define __NR_setresuid 164 +# endif +# ifndef __NR_getresuid +# define __NR_getresuid 165 +# endif +# ifndef __NR_setresgid +# define __NR_setresgid 170 +# endif +# ifndef __NR_getresgid +# define __NR_getresgid 171 +# endif +# ifndef __NR_ugetrlimit +# define __NR_ugetrlimit 191 +# endif +# ifndef __NR_mmap2 +# define __NR_mmap2 192 +# endif +# ifndef __NR_setresuid32 +# define __NR_setresuid32 208 +# endif +# ifndef __NR_getresuid32 +# define __NR_getresuid32 209 +# endif +# ifndef __NR_setresgid32 +# define __NR_setresgid32 210 +# endif +# ifndef __NR_getresgid32 +# define __NR_getresgid32 211 +# endif +# ifndef __NR_setfsuid32 +# define __NR_setfsuid32 215 +# endif +# ifndef __NR_setfsgid32 +# define __NR_setfsgid32 216 +# endif +# ifndef __NR_fadvise64_64 +# define __NR_fadvise64_64 264 +# endif +# ifndef __NR_fstatat64 +# define __NR_fstatat64 293 +# endif +#endif /* __s390__ */ +/* End of s390/s390x definitions */ +#endif + + +/* After forking, we must make sure to only call system calls. */ +#if __BOUNDED_POINTERS__ + #error "Need to port invocations of syscalls for bounded ptrs" +#else + /* The core dumper and the thread lister get executed after threads + * have been suspended. As a consequence, we cannot call any functions + * that acquire locks. Unfortunately, libc wraps most system calls + * (e.g. in order to implement pthread_atfork, and to make calls + * cancellable), which means we cannot call these functions. Instead, + * we have to call syscall() directly. + */ + #undef LSS_ERRNO + #ifdef SYS_ERRNO + /* Allow the including file to override the location of errno. This can + * be useful when using clone() with the CLONE_VM option. + */ + #define LSS_ERRNO SYS_ERRNO + #else + #define LSS_ERRNO errno + #endif + + #undef LSS_INLINE + #ifdef SYS_INLINE + #define LSS_INLINE SYS_INLINE + #else + #define LSS_INLINE static inline + #endif + + /* Allow the including file to override the prefix used for all new + * system calls. By default, it will be set to "sys_". + */ + #undef LSS_NAME + #ifndef SYS_PREFIX + #define LSS_NAME(name) sys_##name + #elif SYS_PREFIX < 0 + #define LSS_NAME(name) name + #elif SYS_PREFIX == 0 + #define LSS_NAME(name) sys0_##name + #elif SYS_PREFIX == 1 + #define LSS_NAME(name) sys1_##name + #elif SYS_PREFIX == 2 + #define LSS_NAME(name) sys2_##name + #elif SYS_PREFIX == 3 + #define LSS_NAME(name) sys3_##name + #elif SYS_PREFIX == 4 + #define LSS_NAME(name) sys4_##name + #elif SYS_PREFIX == 5 + #define LSS_NAME(name) sys5_##name + #elif SYS_PREFIX == 6 + #define LSS_NAME(name) sys6_##name + #elif SYS_PREFIX == 7 + #define LSS_NAME(name) sys7_##name + #elif SYS_PREFIX == 8 + #define LSS_NAME(name) sys8_##name + #elif SYS_PREFIX == 9 + #define LSS_NAME(name) sys9_##name + #endif + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__s390__)) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ + #define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__mips__) + /* On MIPS, failing system calls return -1, and set errno in a + * separate CPU register. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #endif + #if defined(__i386__) + #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404) + /* This only works for GCC-4.4 and above -- the first version to use + .cfi directives for dwarf unwind info. */ + #define CFI_ADJUST_CFA_OFFSET(adjust) \ + ".cfi_adjust_cfa_offset " #adjust "\n" + #else + #define CFI_ADJUST_CFA_OFFSET(adjust) /**/ + #endif + + /* In PIC mode (e.g. when building shared libraries), gcc for i386 + * reserves ebx. Unfortunately, most distribution ship with implementations + * of _syscallX() which clobber ebx. + * Also, most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_BODY + #define LSS_BODY(type,args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(4) \ + "movl %2,%%ebx\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + CFI_ADJUST_CFA_OFFSET(-4) \ + args \ + : "esp", "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1,type2 arg2) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + : "=a" (__res) \ + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ + __asm__ __volatile__("push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "esp", "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "movl %3,%%ecx\n" + "jecxz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "movl %4,%%ecx\n" + "jecxz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 20; + */ + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" + + /* %eax = syscall(%eax = __NR_clone, + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = newtls, + * %edi = child_tidptr) + * Also, make sure that %ebx gets preserved as it is + * used in PIC mode. + */ + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + "int $0x80\n" + + /* In the parent: restore %ebx + * In the child: move "fn" into %ebx + */ + "popl %%ebx\n" + + /* if (%eax != 0) + * return %eax; + */ + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* In the child, now. Terminate frame pointer chain. + */ + "movl $0,%%ebp\n" + + /* Call "fn". "arg" is already on the stack. + */ + "call *%%ebx\n" + + /* Call _exit(%ebx). Unfortunately older versions + * of gcc restrict the number of arguments that can + * be passed to asm(). So, we need to hard-code the + * system call number. + */ + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + "int $0x80\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "esp", "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } + #elif defined(__x86_64__) + /* There are no known problems with any of the _syscallX() macros + * currently shipping for x86_64, but we still need to be able to define + * our own version so that we can override the location of the errno + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ + #undef LSS_ENTRYPOINT + #define LSS_ENTRYPOINT "syscall\n" + + /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. + * We need to explicitly cast to an unsigned 64 bit type to avoid implicit + * sign extension. We can't cast pointers directly because those are + * 32 bits, and gcc will dump ugly warnings about casting from a pointer + * to an integer of a different size. + */ + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) + #undef _LSS_RETURN + #define _LSS_RETURN(type, res, cast) \ + do { \ + if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type)(cast)(res); \ + } while (0) + #undef LSS_RETURN + #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, cast, ...) \ + long long __res; \ + __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ + : "=a" (__res) \ + : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ + : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ + _LSS_RETURN(type, __res, cast) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, uintptr_t, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 + #define LSS_BODY_ASM2 LSS_BODY_ASM1 + #define LSS_BODY_ASM3 LSS_BODY_ASM2 + #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" + + #undef LSS_BODY_CLOBBER0 + #undef LSS_BODY_CLOBBER1 + #undef LSS_BODY_CLOBBER2 + #undef LSS_BODY_CLOBBER3 + #undef LSS_BODY_CLOBBER4 + #undef LSS_BODY_CLOBBER5 + #undef LSS_BODY_CLOBBER6 + #define LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 + #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 + #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", + #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", + #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(arg1) \ + LSS_BODY_ARG0(), "D" (arg1) + #define LSS_BODY_ARG2(arg1, arg2) \ + LSS_BODY_ARG1(arg1), "S" (arg2) + #define LSS_BODY_ARG3(arg1, arg2, arg3) \ + LSS_BODY_ARG2(arg1, arg2), "d" (arg3) + #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ + LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) + #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ + LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) + #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) + + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)() { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long long __res; + { + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "testq %4,%4\n" + "jz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "testq %5,%5\n" + "jz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 16; + */ + "andq $-16,%5\n" + "subq $16,%5\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movq %7,8(%5)\n" + "movq %4,0(%5)\n" + + /* %rax = syscall(%rax = __NR_clone, + * %rdi = flags, + * %rsi = child_stack, + * %rdx = parent_tidptr, + * %r8 = new_tls, + * %r10 = child_tidptr) + */ + "movq %2,%%rax\n" + "movq %9,%%r8\n" + "movq %10,%%r10\n" + "syscall\n" + + /* if (%rax != 0) + * return; + */ + "testq %%rax,%%rax\n" + "jnz 1f\n" + + /* In the child. Terminate frame pointer chain. + */ + "xorq %%rbp,%%rbp\n" + + /* Call "fn(arg)". + */ + "popq %%rax\n" + "popq %%rdi\n" + "call *%%rax\n" + + /* Call _exit(%ebx). + */ + "movq %%rax,%%rdi\n" + "movq %3,%%rax\n" + "syscall\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(LSS_SYSCALL_ARG(fn)), + "S"(LSS_SYSCALL_ARG(child_stack)), + "D"(LSS_SYSCALL_ARG(flags)), + "r"(LSS_SYSCALL_ARG(arg)), + "d"(LSS_SYSCALL_ARG(parent_tidptr)), + "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(child_tidptr)) + : "rsp", "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + long long res; + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return (void (*)(void))(uintptr_t)res; + } + #elif defined(__arm__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all fo the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + + /* r0..r3 are scratch registers and not preserved across function + * calls. We need to first evaluate the first 4 syscall arguments + * and store them on stack. They must be loaded into r0..r3 after + * all function calls to avoid r0..r3 being clobbered. + */ + #undef LSS_SAVE_ARG + #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a + #undef LSS_LOAD_ARG + #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r + + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __SYS_REG(name) \ + __asm__ __volatile__ (__syscall_safe(name) \ + : "=r"(__res_r0) \ + : __SYS_REG_LIST(args) \ + : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + /* There is no need for using a volatile temp. */ \ + LSS_REG(0, arg1); \ + LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_SAVE_ARG(0, arg1); \ + LSS_SAVE_ARG(1, arg2); \ + LSS_SAVE_ARG(2, arg3); \ + LSS_SAVE_ARG(3, arg4); \ + LSS_REG(4, arg5); \ + LSS_REG(5, arg6); \ + LSS_LOAD_ARG(0); \ + LSS_LOAD_ARG(1); \ + LSS_LOAD_ARG(2); \ + LSS_LOAD_ARG(3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register long __res __asm__("r5"); + { + if (fn == NULL || child_stack == NULL) { + __res = -EINVAL; + goto clone_exit; + } + + /* stash first 4 arguments on stack first because we can only load + * them after all function calls. + */ + int tmp_flags = flags; + int * tmp_stack = (int*) child_stack; + void * tmp_ptid = parent_tidptr; + void * tmp_tls = newtls; + + register int *__ctid __asm__("r4") = child_tidptr; + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + *(--tmp_stack) = (int) arg; + *(--tmp_stack) = (int) fn; + + /* We must load r0..r3 last after all possible function calls. */ + register int __flags __asm__("r0") = tmp_flags; + register void *__stack __asm__("r1") = tmp_stack; + register void *__ptid __asm__("r2") = tmp_ptid; + register void *__tls __asm__("r3") = tmp_tls; + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __SYS_REG(clone) + __asm__ __volatile__(/* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + "push {r7}\n" + "mov r7,%1\n" + __syscall(clone)"\n" + + /* if (%r0 != 0) + * return %r0; + */ + "movs %0,r0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + "mov lr,pc\n" + "ldr pc,[sp]\n" + + /* Call _exit(%r0), which never returns. We only + * need to set r7 for EABI syscall ABI but we do + * this always to simplify code sharing between + * old and new syscall ABIs. + */ + "mov r7,%2\n" + __syscall(exit)"\n" + + /* Pop r7 from the stack only in the parent. + */ + "1: pop {r7}\n" + : "=r" (__res) + : "r"(__sysreg), + "i"(__NR_exit), "r"(__stack), "r"(__flags), + "r"(__ptid), "r"(__tls), "r"(__ctid) + : "cc", "lr", "memory"); + } + clone_exit: + LSS_RETURN(int, __res); + } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ + (unsigned long)(a) + + #if _MIPS_SIM == _MIPS_SIM_ABI32 + // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html + // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\ + "$13", "$14", "$15", "$24", "$25", "memory" + #else + #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13", \ + "$14", "$15", "$24", "$25", "memory" + #endif + + #undef LSS_BODY + #define LSS_BODY(type,name,r7,...) \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ ("syscall\n" \ + : "=&r"(__v0), r7 (__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_BODY(type, name, "=r"); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall5 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8)); \ + } + #endif + #undef _syscall6 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "lw $8, %7\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "sw $8, 20($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5), \ + "m" ((unsigned long)arg6) \ + : MIPS_SYSCALL_CLOBBERS); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5,type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8), "r"(__r9)); \ + } + #endif + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register unsigned long __v0 __asm__("$2"); + register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; + { + register int __flags __asm__("$4") = flags; + register void *__stack __asm__("$5") = child_stack; + register void *__ptid __asm__("$6") = parent_tidptr; + register int *__ctid __asm__("$8") = child_tidptr; + __asm__ __volatile__( + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" + #else + "dsubu $29,16\n" + #endif + + /* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "li %0,%2\n" + "beqz %5,1f\n" + "beqz %6,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %6,32\n" + "sw %5,0(%6)\n" + "sw %8,4(%6)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %6,32\n" + "sw %5,0(%6)\n" + "sw %8,8(%6)\n" + #else + "dsubu %6,32\n" + "sd %5,0(%6)\n" + "sd %8,8(%6)\n" + #endif + + /* $7 = syscall($4 = flags, + * $5 = child_stack, + * $6 = parent_tidptr, + * $7 = newtls, + * $8 = child_tidptr) + */ + "li $2,%3\n" + "syscall\n" + + /* if ($7 != 0) + * return $2; + */ + "bnez $7,1f\n" + "bnez $2,1f\n" + + /* In the child, now. Call "fn(arg)". + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" + #else + "ld $25,0($29)\n" + "ld $4,8($29)\n" + #endif + "jalr $25\n" + + /* Call _exit($2) + */ + "move $4,$2\n" + "li $2,%4\n" + "syscall\n" + + "1:\n" + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" + #else + "daddu $29,16\n" + #endif + : "=&r" (__v0), "=r" (__r7) + : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__r7), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$24", "memory"); + } + LSS_RETURN(int, __v0, __r7); + } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.18 clone.S */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { +#if defined(__PPC64__) + +/* Stack frame offsets. */ +#if _CALL_ELF != 2 +#define FRAME_MIN_SIZE 112 +#define FRAME_TOC_SAVE 40 +#else +#define FRAME_MIN_SIZE 32 +#define FRAME_TOC_SAVE 24 +#endif + + + register int (*__fn)(void *) __asm__ ("r3") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r5") = flags; + register void * __arg __asm__ ("r6") = arg; + register int * __ptidptr __asm__ ("r7") = parent_tidptr; + register void * __newtls __asm__ ("r8") = newtls; + register int * __ctidptr __asm__ ("r9") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpdi cr0, %6, 0\n\t" + "cmpdi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrdi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stdu 0, -%13(%7)\n\t" + + /* fn, arg, child_stack are saved acrVoss the syscall */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall + r3 == flags + r4 == child_stack + r5 == parent_tidptr + r6 == newtls + r7 == child_tidptr */ + "mr 3, %8\n\t" + "mr 5, %10\n\t" + "mr 6, %11\n\t" + "mr 7, %12\n\t" + "li 0, %4\n\t" + "sc\n\t" + + /* Test if syscall was successful */ + "cmpdi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "std 2, %14(1)\n\t" +#if _CALL_ELF != 2 + "ld 0, 0(28)\n\t" + "ld 2, 8(28)\n\t" + "mtctr 0\n\t" +#else + "mr 12, 28\n\t" + "mtctr 12\n\t" +#endif + "mr 3, 27\n\t" + "bctrl\n\t" + "ld 2, %14(1)\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "i" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr), "i" (FRAME_MIN_SIZE), "i" (FRAME_TOC_SAVE) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); +#else + register int (*__fn)(void *) __asm__ ("r8") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void * __arg __asm__ ("r9") = arg; + register int * __ptidptr __asm__ ("r5") = parent_tidptr; + register void * __newtls __asm__ ("r6") = newtls; + register int * __ctidptr __asm__ ("r7") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + +#endif + } + LSS_RETURN(int, __ret, __err); + } + #elif defined(__aarch64__) + #undef LSS_REG + #define LSS_REG(r,a) register long __x##r __asm__("x"#r) = (long)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register long __res_x0 __asm__("x0"); \ + long __res; \ + __asm__ __volatile__ ("mov x8, %1\n" \ + "svc 0x0\n" \ + : "=r"(__res_x0) \ + : "i"(__NR_##name) , ## args \ + : "memory"); \ + __res = __res_x0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__x0)); \ + } + #undef _syscall2 + #define _syscall2_long(type, name, svc, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1)); \ + } + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + _syscall2_long(type, name, name, type1, arg1, type2, arg2) + #undef _syscall3 + #define _syscall3_long(type, name, svc, type1, arg1, type2, arg2, \ + type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "r"(__x2)); \ + } + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + _syscall3_long(type, name, name, type1, arg1, type2, arg2, \ + type3, arg3) + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__x0), "r"(__x1), "r"(__x2), "r"(__x3), \ + "r"(__x4)); \ + } + #undef _syscall6 + #define _syscall6_long(type,name,svc,type1,arg1,type2,arg2,type3,arg3, \ + type4,arg4,type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, svc, "r"(__x0), "r"(__x1), "x"(__x2), "r"(__x3), \ + "r"(__x4), "r"(__x5)); \ + } + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + _syscall6_long(type,name,name,type1,arg1,type2,arg2,type3,arg3, \ + type4,arg4,type5,arg5,type6,arg6) + /* clone function adapted from glibc 2.18 clone.S */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + register int (*__fn)(void *) __asm__("x0") = fn; + register void *__stack __asm__("x1") = child_stack; + register int __flags __asm__("x2") = flags; + register void *__arg __asm__("x3") = arg; + register int *__ptid __asm__("x4") = parent_tidptr; + register void *__tls __asm__("x5") = newtls; + register int *__ctid __asm__("x6") = child_tidptr; + __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "cbz x0,1f\n" + "cbz x1,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "stp x0,x3, [x1, #-16]!\n" + + "mov x0,x2\n" /* flags */ + "mov x2,x4\n" /* ptid */ + "mov x3,x5\n" /* tls */ + "mov x4,x6\n" /* ctid */ + "mov x8,%9\n" /* clone */ + + "svc 0x0\n" + + /* if (%r0 != 0) + * return %r0; + */ + "cmp x0, #0\n" + "bne 2f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldp x1, x0, [sp], #16\n" + "blr x1\n" + + /* Call _exit(%r0). + */ + "mov x8, %10\n" + "svc 0x0\n" + "1:\n" + "mov x8, %1\n" + "2:\n" + : "=r" (__res) + : "i"(-EINVAL), + "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), + "r"(__ptid), "r"(__tls), "r"(__ctid), + "i"(__NR_clone), "i"(__NR_exit) + : "x30", "memory"); + } + LSS_RETURN(int, __res); + } + #elif defined(__s390__) + #undef LSS_REG + #define LSS_REG(r, a) register unsigned long __r##r __asm__("r"#r) = (unsigned long) a + #undef LSS_BODY + #define LSS_BODY(type, name, args...) \ + register unsigned long __nr __asm__("r1") \ + = (unsigned long)(__NR_##name); \ + register long __res_r2 __asm__("r2"); \ + long __res; \ + __asm__ __volatile__ \ + ("svc 0\n\t" \ + : "=d"(__res_r2) \ + : "d"(__nr), ## args \ + : "memory"); \ + __res = __res_r2; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(2, arg1); \ + LSS_BODY(type, name, "0"(__r2)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4)); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5)); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5), "d"(__r6)); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, \ + type4 arg4, type5 arg5, type6 arg6) { \ + LSS_REG(2, arg1); LSS_REG(3, arg2); LSS_REG(4, arg3); \ + LSS_REG(5, arg4); LSS_REG(6, arg5); LSS_REG(7, arg6); \ + LSS_BODY(type, name, "0"(__r2), "d"(__r3), "d"(__r4), \ + "d"(__r5), "d"(__r6), "d"(__r7)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret; + { + register int (*__fn)(void *) __asm__ ("r1") = fn; + register void *__cstack __asm__ ("r2") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void *__arg __asm__ ("r0") = arg; + register int *__ptidptr __asm__ ("r4") = parent_tidptr; + register void *__newtls __asm__ ("r6") = newtls; + register int *__ctidptr __asm__ ("r5") = child_tidptr; + __asm__ __volatile__ ( + #ifndef __s390x__ + /* arg already in r0 */ + "ltr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltr %0,%%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lr %%r2, %7\n\t" /* set first parameter to void *arg */ + "ahi %%r15, -96\n\t" /* make room on the stack for the save area */ + "xc 0(4,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" + #else + /* arg already in r0 */ + "ltgr %4, %4\n\t" /* check fn, which is already in r1 */ + "jz 1f\n\t" /* NULL function pointer, return -EINVAL */ + "ltgr %5, %5\n\t" /* check child_stack, which is already in r2 */ + "jz 1f\n\t" /* NULL stack pointer, return -EINVAL */ + /* flags already in r3 */ + /* parent_tidptr already in r4 */ + /* child_tidptr already in r5 */ + /* newtls already in r6 */ + "svc %2\n\t" /* invoke clone syscall */ + "ltgr %0, %%r2\n\t" /* load return code into __ret and test */ + "jnz 1f\n\t" /* return to parent if non-zero */ + /* start child thread */ + "lgr %%r2, %7\n\t" /* set first parameter to void *arg */ + "aghi %%r15, -160\n\t" /* make room on the stack for the save area */ + "xc 0(8,%%r15), 0(%%r15)\n\t" + "basr %%r14, %4\n\t" /* jump to fn */ + "svc %3\n" /* invoke exit syscall */ + "1:\n" + #endif + : "=r" (__ret) + : "0" (-EINVAL), "i" (__NR_clone), "i" (__NR_exit), + "d" (__fn), "d" (__cstack), "d" (__flags), "d" (__arg), + "d" (__ptidptr), "d" (__newtls), "d" (__ctidptr) + : "cc", "r14", "memory" + ); + } + LSS_RETURN(int, __ret); + } + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid + #define __NR__mremap __NR_mremap + LSS_INLINE _syscall1(int, close, int, f) + LSS_INLINE _syscall1(int, _exit, int, e) +#if defined(__aarch64__) && defined (__ILP32__) + /* aarch64_ilp32 uses fcntl64 for sys_fcntl() */ + LSS_INLINE _syscall3_long(int, fcntl, fcntl64, int, f, + int, c, long, a) +#else + LSS_INLINE _syscall3(int, fcntl, int, f, + int, c, long, a) +#endif +#if defined(__aarch64__) && defined (__ILP32__) + /* aarch64_ilp32 uses fstat64 for sys_fstat() */ + LSS_INLINE _syscall2_long(int, fstat, fstat64, int, f, + struct kernel_stat*, b) +#else + LSS_INLINE _syscall2(int, fstat, int, f, + struct kernel_stat*, b) +#endif + LSS_INLINE _syscall6(int, futex, int*, a, + int, o, int, v, + struct kernel_timespec*, t, + int*, a2, + int, v3) +#ifdef __NR_getdents64 + LSS_INLINE _syscall3(int, getdents64, int, f, + struct kernel_dirent64*, d, int, c) +#define KERNEL_DIRENT kernel_dirent64 +#define GETDENTS sys_getdents64 +#else + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) +#define KERNEL_DIRENT kernel_dirent +#define GETDENTS sys_getdents +#endif + LSS_INLINE _syscall0(pid_t, getpid) + LSS_INLINE _syscall0(pid_t, getppid) + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) + #if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { + _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), + LSS_SYSCALL_ARG(w)); + } + #elif defined(__aarch64__) && defined (__ILP32__) + /* aarch64_ilp32 uses llseek for sys_lseek() */ + LSS_INLINE _syscall3_long(off_t, lseek, llseek, int, f, + off_t, o, int, w) + #else + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + #endif + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall5(void*, _mremap, void*, o, + size_t, os, size_t, ns, + unsigned long, f, void *, a) + LSS_INLINE _syscall2(int, prctl, int, o, + long, a) + LSS_INLINE _syscall4(long, ptrace, int, r, + pid_t, p, void *, a, void *, d) + LSS_INLINE _syscall3(ssize_t, read, int, f, + void *, b, size_t, c) + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); + LSS_INLINE _syscall0(int, sched_yield) + LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, + const stack_t*, o) + #if defined(__NR_fstatat) + LSS_INLINE _syscall4(int, fstatat, int, d, const char *, p, + struct kernel_stat*, b, int, flags) + LSS_INLINE int LSS_NAME(stat)(const char* p, struct kernel_stat* b) { + return LSS_NAME(fstatat)(AT_FDCWD,p,b,0); + } + #else + LSS_INLINE _syscall2(int, stat, const char*, f, + struct kernel_stat*, b) + #endif + LSS_INLINE _syscall3(ssize_t, write, int, f, + const void *, b, size_t, c) + #if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused); + #endif + #if defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__x86_64__) || defined(__s390x__) + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + #if defined(__x86_64__) + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else + #endif + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #if (defined(__aarch64__)) || \ + (defined(__mips__) \ + && (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32)) + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + return LSS_NAME(rt_sigaction)(signum, act, oldact, (KERNEL_NSIG+7)/8); + + } + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + #endif + #ifdef __NR_wait4 + LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, + int*, s, int, o, + struct kernel_rusage*, r) + LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ + return LSS_NAME(wait4)(pid, status, options, 0); + } + #else + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, + int*, s, int, o) + #endif + #ifdef __NR_openat + LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) + LSS_INLINE int LSS_NAME(open)(const char* p, int f, int m) { + return LSS_NAME(openat)(AT_FDCWD,p,f,m ); + } + #else + LSS_INLINE _syscall3(int, open, const char*, p, + int, f, int, m) + #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + #if defined(__i386__) || \ + defined(__arm__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + defined(__PPC__) || \ + (defined(__s390__) && !defined(__s390x__)) + #define __NR__sigaction __NR_sigaction + #define __NR__sigprocmask __NR_sigprocmask + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) + LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, + loff_t *, res, uint, wh) +#if defined(__s390__) && !defined(__s390x__) + /* On s390, mmap2() arguments are passed in memory. */ + LSS_INLINE void* LSS_NAME(_mmap2)(void *s, size_t l, int p, int f, int d, + off_t o) { + unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, + (unsigned long) p, (unsigned long) f, + (unsigned long) d, (unsigned long) o }; + LSS_REG(2, buf); + LSS_BODY(void*, mmap2, "0"(__r2)); + } +#elif !defined(__PPC64__) + #define __NR__mmap2 __NR_mmap2 + LSS_INLINE _syscall6(void*, _mmap2, void*, s, + size_t, l, int, p, + int, f, int, d, + off_t, o) +#endif + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall3(int, _sigprocmask, int, h, + const unsigned long*, s, + unsigned long*, o) + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + if (oldset) { + LSS_NAME(sigemptyset)(oldset); + } + rc = LSS_NAME(_sigprocmask)(how, + set ? &set->sig[0] : NULL, + oldset ? &oldset->sig[0] : NULL); + } + return rc; + } + #endif + #if defined(__i386__) || \ + defined(__ARM_ARCH_3__) || defined(__ARM_EABI__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + (defined(__PPC__) && !defined(__PPC64__)) || \ + (defined(__s390__) && !defined(__s390x__)) + /* On these architectures, implement mmap() with mmap2(). */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + if (o % 4096) { + LSS_ERRNO = EINVAL; + return (void *) -1; + } + return LSS_NAME(_mmap2)(s, l, p, f, d, (o / 4096)); + } + #elif defined(__s390x__) + /* On s390x, mmap() arguments are passed in memory. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + unsigned long buf[6] = { (unsigned long) s, (unsigned long) l, + (unsigned long) p, (unsigned long) f, + (unsigned long) d, (unsigned long) o }; + LSS_REG(2, buf); + LSS_BODY(void*, mmap, "0"(__r2)); + } + #elif defined(__x86_64__) + /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + int64_t o) { + LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), + LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), + LSS_SYSCALL_ARG(d), (uint64_t)(o)); + } + #elif defined(__aarch64__) && defined (__ILP32__) + /* aarch64_ilp32 uses mmap2 for sys_mmap() */ + LSS_INLINE _syscall6_long(void*, mmap, mmap2, void*, addr, size_t, length, + int, prot, int, flags, int, fd, int64_t, offset) + #else + /* Remaining 64-bit architectures. */ + LSS_INLINE _syscall6(void*, mmap, void*, addr, size_t, length, int, prot, + int, flags, int, fd, int64_t, offset) + #endif + #if defined(__i386__) || \ + defined(__PPC__) || \ + (defined(__arm__) && !defined(__ARM_EABI__)) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || \ + defined(__s390__) + + /* See sys_socketcall in net/socket.c in kernel source. + * It de-multiplexes on its first arg and unpacks the arglist + * array in its second arg. + */ + LSS_INLINE _syscall2(int, socketcall, int, c, unsigned long*, a) + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + unsigned long args[3] = { + (unsigned long) domain, + (unsigned long) type, + (unsigned long) protocol + }; + return LSS_NAME(socketcall)(1, args); + } + #elif defined(__ARM_EABI__) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + #endif + #if defined(__mips__) + /* sys_pipe() on MIPS has non-standard calling conventions, as it returns + * both file handles through CPU registers. + */ + LSS_INLINE int LSS_NAME(pipe)(int *p) { + register unsigned long __v0 __asm__("$2") = __NR_pipe; + register unsigned long __v1 __asm__("$3"); + register unsigned long __r7 __asm__("$7"); + __asm__ __volatile__ ("syscall\n" + : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", + "$13", "$14", "$15", "$24", "memory"); + if (__r7) { + LSS_ERRNO = __v0; + return -1; + } else { + p[0] = __v0; + p[1] = __v1; + return 0; + } + } + #elif defined(__NR_pipe2) + LSS_INLINE _syscall2(int, pipe2, int *, p, + int, f ) + LSS_INLINE int LSS_NAME(pipe)( int * p) { + return LSS_NAME(pipe2)(p, 0); + } + #else + LSS_INLINE _syscall1(int, pipe, int *, p) + #endif + + LSS_INLINE pid_t LSS_NAME(gettid)() { + pid_t tid = LSS_NAME(_gettid)(); + if (tid != -1) { + return tid; + } + return LSS_NAME(getpid)(); + } + + LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, + size_t new_size, int flags, ...) { + va_list ap; + void *new_address, *rc; + va_start(ap, flags); + new_address = va_arg(ap, void *); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, + flags, new_address); + va_end(ap); + return rc; + } + + LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { + /* PTRACE_DETACH can sometimes forget to wake up the tracee and it + * then sends job control signals to the real parent, rather than to + * the tracer. We reduce the risk of this happening by starting a + * whole new time slice, and then quickly sending a SIGCONT signal + * right after detaching from the tracee. + */ + int rc, err; + LSS_NAME(sched_yield)(); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + err = LSS_ERRNO; + LSS_NAME(kill)(pid, SIGCONT); + LSS_ERRNO = err; + return rc; + } +#endif + +#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) +} +#endif + +#endif +#endif diff --git a/src/third_party/gperftools-2.7/src/base/linuxthreads.cc b/src/third_party/gperftools-2.7/src/base/linuxthreads.cc new file mode 100644 index 00000000000..891e70c88c4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/linuxthreads.cc @@ -0,0 +1,707 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "base/linuxthreads.h" + +#ifdef THREADS +#ifdef __cplusplus +extern "C" { +#endif + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <semaphore.h> + +#include "base/linux_syscall_support.h" +#include "base/thread_lister.h" + +#ifndef CLONE_UNTRACED +#define CLONE_UNTRACED 0x00800000 +#endif + + +/* Synchronous signals that should not be blocked while in the lister thread. + */ +static const int sync_signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS, + SIGXCPU, SIGXFSZ }; + +/* itoa() is not a standard function, and we cannot safely call printf() + * after suspending threads. So, we just implement our own copy. A + * recursive approach is the easiest here. + */ +static char *local_itoa(char *buf, int i) { + if (i < 0) { + *buf++ = '-'; + return local_itoa(buf, -i); + } else { + if (i >= 10) + buf = local_itoa(buf, i/10); + *buf++ = (i%10) + '0'; + *buf = '\000'; + return buf; + } +} + + +/* Wrapper around clone() that runs "fn" on the same stack as the + * caller! Unlike fork(), the cloned thread shares the same address space. + * The caller must be careful to use only minimal amounts of stack until + * the cloned thread has returned. + * There is a good chance that the cloned thread and the caller will share + * the same copy of errno! + */ +#ifdef __GNUC__ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 +/* Try to force this function into a separate stack frame, and make sure + * that arguments are passed on the stack. + */ +static int local_clone (int (*fn)(void *), void *arg, ...) + __attribute__ ((noinline)); +#endif +#endif + +/* To avoid the gap cross page boundaries, increase by the large parge + * size mostly PowerPC system uses. */ +#ifdef __PPC64__ +#define CLONE_STACK_SIZE 65536 +#else +#define CLONE_STACK_SIZE 4096 +#endif + +static int local_clone (int (*fn)(void *), void *arg, ...) { + /* Leave 4kB of gap between the callers stack and the new clone. This + * should be more than sufficient for the caller to call waitpid() until + * the cloned thread terminates. + * + * It is important that we set the CLONE_UNTRACED flag, because newer + * versions of "gdb" otherwise attempt to attach to our thread, and will + * attempt to reap its status codes. This subsequently results in the + * caller hanging indefinitely in waitpid(), waiting for a change in + * status that will never happen. By setting the CLONE_UNTRACED flag, we + * prevent "gdb" from stealing events, but we still expect the thread + * lister to fail, because it cannot PTRACE_ATTACH to the process that + * is being debugged. This is OK and the error code will be reported + * correctly. + */ + return sys_clone(fn, (char *)&arg - CLONE_STACK_SIZE, + CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0); +} + + +/* Local substitute for the atoi() function, which is not necessarily safe + * to call once threads are suspended (depending on whether libc looks up + * locale information, when executing atoi()). + */ +static int local_atoi(const char *s) { + int n = 0; + int neg = *s == '-'; + if (neg) + s++; + while (*s >= '0' && *s <= '9') + n = 10*n + (*s++ - '0'); + return neg ? -n : n; +} + + +/* Re-runs fn until it doesn't cause EINTR + */ +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + + +/* Wrap a class around system calls, in order to give us access to + * a private copy of errno. This only works in C++, but it has the + * advantage of not needing nested functions, which are a non-standard + * language extension. + */ +#ifdef __cplusplus +namespace { + class SysCalls { + public: + #define SYS_CPLUSPLUS + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX -1 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + SysCalls() : my_errno(0) { } + int my_errno; + }; +} +#define ERRNO sys.my_errno +#else +#define ERRNO my_errno +#endif + + +/* Wrapper for open() which is guaranteed to never return EINTR. + */ +static int c_open(const char *fname, int flags, int mode) { + ssize_t rc; + NO_INTR(rc = sys_open(fname, flags, mode)); + return rc; +} + + +/* abort() is not safely reentrant, and changes it's behavior each time + * it is called. This means, if the main application ever called abort() + * we cannot safely call it again. This would happen if we were called + * from a SIGABRT signal handler in the main application. So, document + * that calling SIGABRT from the thread lister makes it not signal safe + * (and vice-versa). + * Also, since we share address space with the main application, we + * cannot call abort() from the callback and expect the main application + * to behave correctly afterwards. In fact, the only thing we can do, is + * to terminate the main application with extreme prejudice (aka + * PTRACE_KILL). + * We set up our own SIGABRT handler to do this. + * In order to find the main application from the signal handler, we + * need to store information about it in global variables. This is + * safe, because the main application should be suspended at this + * time. If the callback ever called TCMalloc_ResumeAllProcessThreads(), then + * we are running a higher risk, though. So, try to avoid calling + * abort() after calling TCMalloc_ResumeAllProcessThreads. + */ +static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker; + + +/* Signal handler to help us recover from dying while we are attached to + * other threads. + */ +static void SignalHandler(int signum, siginfo_t *si, void *data) { + if (sig_pids != NULL) { + if (signum == SIGABRT) { + while (sig_num_threads-- > 0) { + /* Not sure if sched_yield is really necessary here, but it does not */ + /* hurt, and it might be necessary for the same reasons that we have */ + /* to do so in sys_ptrace_detach(). */ + sys_sched_yield(); + sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0); + } + } else if (sig_num_threads > 0) { + TCMalloc_ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids); + } + } + sig_pids = NULL; + if (sig_marker >= 0) + NO_INTR(sys_close(sig_marker)); + sig_marker = -1; + if (sig_proc >= 0) + NO_INTR(sys_close(sig_proc)); + sig_proc = -1; + + sys__exit(signum == SIGABRT ? 1 : 2); +} + + +/* Try to dirty the stack, and hope that the compiler is not smart enough + * to optimize this function away. Or worse, the compiler could inline the + * function and permanently allocate the data on the stack. + */ +static void DirtyStack(size_t amount) { + char buf[amount]; + memset(buf, 0, amount); + sys_read(-1, buf, amount); +} + + +/* Data structure for passing arguments to the lister thread. + */ +#define ALT_STACKSIZE (MINSIGSTKSZ + 4096) + +struct ListerParams { + int result, err; + char *altstack_mem; + ListAllProcessThreadsCallBack callback; + void *parameter; + va_list ap; + sem_t *lock; +}; + + +static void ListerThread(struct ListerParams *args) { + int found_parent = 0; + pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); + char proc_self_task[80], marker_name[48], *marker_path; + const char *proc_paths[3]; + const char *const *proc_path = proc_paths; + int proc = -1, marker = -1, num_threads = 0; + int max_threads = 0, sig; + struct kernel_stat marker_sb, proc_sb; + stack_t altstack; + + /* Wait for parent thread to set appropriate permissions + * to allow ptrace activity + */ + if (sem_wait(args->lock) < 0) { + goto failure; + } + + /* Create "marker" that we can use to detect threads sharing the same + * address space and the same file handles. By setting the FD_CLOEXEC flag + * we minimize the risk of misidentifying child processes as threads; + * and since there is still a race condition, we will filter those out + * later, anyway. + */ + if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 || + sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) { + failure: + args->result = -1; + args->err = errno; + if (marker >= 0) + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + if (proc >= 0) + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + sys__exit(1); + } + + /* Compute search paths for finding thread directories in /proc */ + local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid); + strcpy(marker_name, proc_self_task); + marker_path = marker_name + strlen(marker_name); + strcat(proc_self_task, "/task/"); + proc_paths[0] = proc_self_task; /* /proc/$$/task/ */ + proc_paths[1] = "/proc/"; /* /proc/ */ + proc_paths[2] = NULL; + + /* Compute path for marker socket in /proc */ + local_itoa(strcpy(marker_path, "/fd/") + 4, marker); + if (sys_stat(marker_name, &marker_sb) < 0) { + goto failure; + } + + /* Catch signals on an alternate pre-allocated stack. This way, we can + * safely execute the signal handler even if we ran out of memory. + */ + memset(&altstack, 0, sizeof(altstack)); + altstack.ss_sp = args->altstack_mem; + altstack.ss_flags = 0; + altstack.ss_size = ALT_STACKSIZE; + sys_sigaltstack(&altstack, (const stack_t *)NULL); + + /* Some kernels forget to wake up traced processes, when the + * tracer dies. So, intercept synchronous signals and make sure + * that we wake up our tracees before dying. It is the caller's + * responsibility to ensure that asynchronous signals do not + * interfere with this function. + */ + sig_marker = marker; + sig_proc = -1; + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + struct kernel_sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction_ = SignalHandler; + sys_sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; + sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL); + } + + /* Read process directories in /proc/... */ + for (;;) { + /* Some kernels know about threads, and hide them in "/proc" + * (although they are still there, if you know the process + * id). Threads are moved into a separate "task" directory. We + * check there first, and then fall back on the older naming + * convention if necessary. + */ + if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) { + if (*++proc_path != NULL) + continue; + goto failure; + } + if (sys_fstat(proc, &proc_sb) < 0) + goto failure; + + /* Since we are suspending threads, we cannot call any libc + * functions that might acquire locks. Most notably, we cannot + * call malloc(). So, we have to allocate memory on the stack, + * instead. Since we do not know how much memory we need, we + * make a best guess. And if we guessed incorrectly we retry on + * a second iteration (by jumping to "detach_threads"). + * + * Unless the number of threads is increasing very rapidly, we + * should never need to do so, though, as our guestimate is very + * conservative. + */ + if (max_threads < proc_sb.st_nlink + 100) + max_threads = proc_sb.st_nlink + 100; + + /* scope */ { + pid_t pids[max_threads]; + int added_entries = 0; + sig_num_threads = num_threads; + sig_pids = pids; + for (;;) { + struct KERNEL_DIRENT *entry; + char buf[4096]; + ssize_t nbytes = GETDENTS(proc, (struct KERNEL_DIRENT *)buf, + sizeof(buf)); + if (nbytes < 0) + goto failure; + else if (nbytes == 0) { + if (added_entries) { + /* Need to keep iterating over "/proc" in multiple + * passes until we no longer find any more threads. This + * algorithm eventually completes, when all threads have + * been suspended. + */ + added_entries = 0; + sys_lseek(proc, 0, SEEK_SET); + continue; + } + break; + } + for (entry = (struct KERNEL_DIRENT *)buf; + entry < (struct KERNEL_DIRENT *)&buf[nbytes]; + entry = (struct KERNEL_DIRENT *)((char *)entry+entry->d_reclen)) { + if (entry->d_ino != 0) { + const char *ptr = entry->d_name; + pid_t pid; + + /* Some kernels hide threads by preceding the pid with a '.' */ + if (*ptr == '.') + ptr++; + + /* If the directory is not numeric, it cannot be a + * process/thread + */ + if (*ptr < '0' || *ptr > '9') + continue; + pid = local_atoi(ptr); + + /* Attach (and suspend) all threads */ + if (pid && pid != clone_pid) { + struct kernel_stat tmp_sb; + char fname[entry->d_reclen + 48]; + strcat(strcat(strcpy(fname, "/proc/"), + entry->d_name), marker_path); + + /* Check if the marker is identical to the one we created */ + if (sys_stat(fname, &tmp_sb) >= 0 && + marker_sb.st_ino == tmp_sb.st_ino) { + long i, j; + + /* Found one of our threads, make sure it is no duplicate */ + for (i = 0; i < num_threads; i++) { + /* Linear search is slow, but should not matter much for + * the typically small number of threads. + */ + if (pids[i] == pid) { + /* Found a duplicate; most likely on second pass */ + goto next_entry; + } + } + + /* Check whether data structure needs growing */ + if (num_threads >= max_threads) { + /* Back to square one, this time with more memory */ + NO_INTR(sys_close(proc)); + goto detach_threads; + } + + /* Attaching to thread suspends it */ + pids[num_threads++] = pid; + sig_num_threads = num_threads; + if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, + (void *)0) < 0) { + /* If operation failed, ignore thread. Maybe it + * just died? There might also be a race + * condition with a concurrent core dumper or + * with a debugger. In that case, we will just + * make a best effort, rather than failing + * entirely. + */ + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + while (sys_waitpid(pid, (int *)0, __WALL) < 0) { + if (errno != EINTR) { + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + } + + if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j || + sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i != j) { + /* Address spaces are distinct, even though both + * processes show the "marker". This is probably + * a forked child process rather than a thread. + */ + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + } else { + found_parent |= pid == ppid; + added_entries++; + } + } + } + } + next_entry:; + } + } + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + + /* If we failed to find any threads, try looking somewhere else in + * /proc. Maybe, threads are reported differently on this system. + */ + if (num_threads > 1 || !*++proc_path) { + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + + /* If we never found the parent process, something is very wrong. + * Most likely, we are running in debugger. Any attempt to operate + * on the threads would be very incomplete. Let's just report an + * error to the caller. + */ + if (!found_parent) { + TCMalloc_ResumeAllProcessThreads(num_threads, pids); + sys__exit(3); + } + + /* Now we are ready to call the callback, + * which takes care of resuming the threads for us. + */ + args->result = args->callback(args->parameter, num_threads, + pids, args->ap); + args->err = errno; + + /* Callback should have resumed threads, but better safe than sorry */ + if (TCMalloc_ResumeAllProcessThreads(num_threads, pids)) { + /* Callback forgot to resume at least one thread, report error */ + args->err = EINVAL; + args->result = -1; + } + + sys__exit(0); + } + detach_threads: + /* Resume all threads prior to retrying the operation */ + TCMalloc_ResumeAllProcessThreads(num_threads, pids); + sig_pids = NULL; + num_threads = 0; + sig_num_threads = num_threads; + max_threads += 100; + } + } +} + + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * TCMalloc_ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + char altstack_mem[ALT_STACKSIZE]; + struct ListerParams args; + pid_t clone_pid; + int dumpable = 1, sig; + struct kernel_sigset_t sig_blocked, sig_old; + sem_t lock; + + va_start(args.ap, callback); + + /* If we are short on virtual memory, initializing the alternate stack + * might trigger a SIGSEGV. Let's do this early, before it could get us + * into more trouble (i.e. before signal handlers try to use the alternate + * stack, and before we attach to other threads). + */ + memset(altstack_mem, 0, sizeof(altstack_mem)); + + /* Some of our cleanup functions could conceivable use more stack space. + * Try to touch the stack right now. This could be defeated by the compiler + * being too smart for it's own good, so try really hard. + */ + DirtyStack(32768); + + /* Make this process "dumpable". This is necessary in order to ptrace() + * after having called setuid(). + */ + dumpable = sys_prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, 1); + + /* Fill in argument block for dumper thread */ + args.result = -1; + args.err = 0; + args.altstack_mem = altstack_mem; + args.parameter = parameter; + args.callback = callback; + args.lock = &lock; + + /* Before cloning the thread lister, block all asynchronous signals, as we */ + /* are not prepared to handle them. */ + sys_sigfillset(&sig_blocked); + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + sys_sigdelset(&sig_blocked, sync_signals[sig]); + } + if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { + args.err = errno; + args.result = -1; + goto failed; + } + + /* scope */ { + /* After cloning, both the parent and the child share the same instance + * of errno. We must make sure that at least one of these processes + * (in our case, the parent) uses modified syscall macros that update + * a local copy of errno, instead. + */ + #ifdef __cplusplus + #define sys0_sigprocmask sys.sigprocmask + #define sys0_waitpid sys.waitpid + SysCalls sys; + #else + int my_errno; + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX 0 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + #endif + + /* Lock before clone so that parent can set + * ptrace permissions (if necessary) prior + * to ListerThread actually executing + */ + if (sem_init(&lock, 0, 0) == 0) { + + int clone_errno; + clone_pid = local_clone((int (*)(void *))ListerThread, &args); + clone_errno = errno; + + sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); + + if (clone_pid >= 0) { +#ifdef PR_SET_PTRACER + /* In newer versions of glibc permission must explicitly + * be given to allow for ptrace. + */ + prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0); +#endif + /* Releasing the lock here allows the + * ListerThread to execute and ptrace us. + */ + sem_post(&lock); + int status, rc; + while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 && + ERRNO == EINTR) { + /* Keep waiting */ + } + if (rc < 0) { + args.err = ERRNO; + args.result = -1; + } else if (WIFEXITED(status)) { + switch (WEXITSTATUS(status)) { + case 0: break; /* Normal process termination */ + case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ + args.result = -1; + break; + case 3: args.err = EPERM; /* Process is already being traced */ + args.result = -1; + break; + default:args.err = ECHILD; /* Child died unexpectedly */ + args.result = -1; + break; + } + } else if (!WIFEXITED(status)) { + args.err = EFAULT; /* Terminated due to an unhandled signal*/ + args.result = -1; + } + sem_destroy(&lock); + } else { + args.result = -1; + args.err = clone_errno; + } + } else { + args.result = -1; + args.err = errno; + } + } + + /* Restore the "dumpable" state of the process */ +failed: + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, dumpable); + + va_end(args.ap); + + errno = args.err; + return args.result; +} + +/* This function resumes the list of all linux threads that + * TCMalloc_ListAllProcessThreads pauses before giving to its callback. + * The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + int detached_at_least_one = 0; + while (num_threads-- > 0) { + detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0; + } + return detached_at_least_one; +} + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/third_party/gperftools-2.7/src/base/linuxthreads.h b/src/third_party/gperftools-2.7/src/base/linuxthreads.h new file mode 100644 index 00000000000..09ce45fc13f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/linuxthreads.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _LINUXTHREADS_H +#define _LINUXTHREADS_H + +/* Include thread_lister.h to get the interface that we implement for linux. + */ + +/* We currently only support certain platforms on Linux. Porting to other + * related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__PPC__) || defined(__aarch64__) || \ + defined(__s390__)) && defined(__linux) + +/* Define the THREADS symbol to make sure that there is exactly one core dumper + * built into the library. + */ +#define THREADS "Linux /proc" + +#endif + +#endif /* _LINUXTHREADS_H */ diff --git a/src/third_party/gperftools-2.7/src/base/logging.cc b/src/third_party/gperftools-2.7/src/base/logging.cc new file mode 100644 index 00000000000..761c2fd582e --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/logging.cc @@ -0,0 +1,108 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file just provides storage for FLAGS_verbose. + +#include <config.h> +#include "base/logging.h" +#include "base/commandlineflags.h" + +DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0), + "Set to numbers >0 for more verbose output, or <0 for less. " + "--verbose == -4 means we log fatal errors only."); + + +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +// While windows does have a POSIX-compatible API +// (_open/_write/_close), it acquires memory. Using this lower-level +// windows API is the closest we can get to being "raw". +RawFD RawOpenForWriting(const char* filename) { + // CreateFile allocates memory if file_name isn't absolute, so if + // that ever becomes a problem then we ought to compute the absolute + // path on its behalf (perhaps the ntdll/kernel function isn't aware + // of the working directory?) + RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, 0, NULL); + if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS) + SetEndOfFile(fd); // truncate the existing file + return fd; +} + +void RawWrite(RawFD handle, const char* buf, size_t len) { + while (len > 0) { + DWORD wrote; + BOOL ok = WriteFile(handle, buf, len, &wrote, NULL); + // We do not use an asynchronous file handle, so ok==false means an error + if (!ok) break; + buf += wrote; + len -= wrote; + } +} + +void RawClose(RawFD handle) { + CloseHandle(handle); +} + +#else // _WIN32 || __CYGWIN__ || __CYGWIN32__ + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +RawFD RawOpenForWriting(const char* filename) { + return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664); +} + +void RawWrite(RawFD fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + if (r <= 0) break; + buf += r; + len -= r; + } +} + +void RawClose(RawFD fd) { + NO_INTR(close(fd)); +} + +#endif // _WIN32 || __CYGWIN__ || __CYGWIN32__ diff --git a/src/third_party/gperftools-2.7/src/base/logging.h b/src/third_party/gperftools-2.7/src/base/logging.h new file mode 100644 index 00000000000..a1afe4dca6e --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/logging.h @@ -0,0 +1,259 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file contains #include information about logging-related stuff. +// Pretty much everybody needs to #include this file so that they can +// log various happenings. +// +#ifndef _LOGGING_H_ +#define _LOGGING_H_ + +#include <config.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <string.h> // for strlen(), strcmp() +#include <assert.h> +#include <errno.h> // for errno +#include "base/commandlineflags.h" + +// On some systems (like freebsd), we can't call write() at all in a +// global constructor, perhaps because errno hasn't been set up. +// (In windows, we can't call it because it might call malloc.) +// Calling the write syscall is safer (it doesn't set errno), so we +// prefer that. Note we don't care about errno for logging: we just +// do logging on a best-effort basis. +#if defined(_MSC_VER) +#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len); // in port.cc +#elif defined(HAVE_SYS_SYSCALL_H) +#include <sys/syscall.h> +#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len) +#else +#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) +#endif + +// MSVC and mingw define their own, safe version of vnsprintf (the +// windows one in broken) in port.cc. Everyone else can use the +// version here. We had to give it a unique name for windows. +#ifndef _WIN32 +# define perftools_vsnprintf vsnprintf +#endif + + +// We log all messages at this log-level and below. +// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4 +DECLARE_int32(verbose); + +// CHECK dies with a fatal error if condition is not true. It is *not* +// controlled by NDEBUG, so the check will be executed regardless of +// compilation mode. Therefore, it is safe to do things like: +// CHECK(fp->Write(x) == 4) +// Note we use write instead of printf/puts to avoid the risk we'll +// call malloc(). +#define CHECK(condition) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition "\n", \ + sizeof("Check failed: " #condition "\n")-1); \ + abort(); \ + } \ + } while (0) + +// This takes a message to print. The name is historical. +#define RAW_CHECK(condition, message) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition ": " message "\n", \ + sizeof("Check failed: " #condition ": " message "\n")-1);\ + abort(); \ + } \ + } while (0) + +// This is like RAW_CHECK, but only in debug-mode +#ifdef NDEBUG +enum { DEBUG_MODE = 0 }; +#define RAW_DCHECK(condition, message) +#else +enum { DEBUG_MODE = 1 }; +#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message) +#endif + +// This prints errno as well. Note we use write instead of printf/puts to +// avoid the risk we'll call malloc(). +#define PCHECK(condition) \ + do { \ + if (!(condition)) { \ + const int err_no = errno; \ + WRITE_TO_STDERR("Check failed: " #condition ": ", \ + sizeof("Check failed: " #condition ": ")-1); \ + WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no))); \ + WRITE_TO_STDERR("\n", sizeof("\n")-1); \ + abort(); \ + } \ + } while (0) + +// Helper macro for binary operators; prints the two values on error +// Don't use this macro directly in your code, use CHECK_EQ et al below + +// WARNING: These don't compile correctly if one of the arguments is a pointer +// and the other is NULL. To work around this, simply static_cast NULL to the +// type of the desired pointer. + +// TODO(jandrews): Also print the values in case of failure. Requires some +// sort of type-sensitive ToString() function. +#define CHECK_OP(op, val1, val2) \ + do { \ + if (!((val1) op (val2))) { \ + fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \ + abort(); \ + } \ + } while (0) + +#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) +#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) +#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) +#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) +#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) +#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) + +// Synonyms for CHECK_* that are used in some unittests. +#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) +#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) +#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) +#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) +#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) +#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) +#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) +#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) +#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) +#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) +#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) +#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) +// As are these variants. +#define EXPECT_TRUE(cond) CHECK(cond) +#define EXPECT_FALSE(cond) CHECK(!(cond)) +#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) +#define ASSERT_TRUE(cond) EXPECT_TRUE(cond) +#define ASSERT_FALSE(cond) EXPECT_FALSE(cond) +#define ASSERT_STREQ(a, b) EXPECT_STREQ(a, b) + +// Used for (libc) functions that return -1 and set errno +#define CHECK_ERR(invocation) PCHECK((invocation) != -1) + +// A few more checks that only happen in debug mode +#ifdef NDEBUG +#define DCHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) +#else +#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) +#endif + + +#ifdef ERROR +#undef ERROR // may conflict with ERROR macro on windows +#endif +enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4}; + +// NOTE: we add a newline to the end of the output if it's not there already +inline void LogPrintf(int severity, const char* pat, va_list ap) { + // We write directly to the stderr file descriptor and avoid FILE + // buffering because that may invoke malloc() + char buf[600]; + perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap); + if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') { + assert(strlen(buf)+1 < sizeof(buf)); + strcat(buf, "\n"); + } + WRITE_TO_STDERR(buf, strlen(buf)); + if ((severity) == FATAL) + abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls +} + +// Note that since the order of global constructors is unspecified, +// global code that calls RAW_LOG may execute before FLAGS_verbose is set. +// Such code will run with verbosity == 0 no matter what. +#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity) + +// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it. +#define LOG_PRINTF(severity, pat) do { \ + if (VLOG_IS_ON(severity)) { \ + va_list ap; \ + va_start(ap, pat); \ + LogPrintf(severity, pat, ap); \ + va_end(ap); \ + } \ +} while (0) + +// RAW_LOG is the main function; some synonyms are used in unittests. +inline void RAW_LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG_IF(int lvl, bool cond, const char* pat, ...) { + if (cond) LOG_PRINTF(lvl, pat); +} + +// This isn't technically logging, but it's also IO and also is an +// attempt to be "raw" -- that is, to not use any higher-level libc +// routines that might allocate memory or (ideally) try to allocate +// locks. We use an opaque file handle (not necessarily an int) +// to allow even more low-level stuff in the future. +// Like other "raw" routines, these functions are best effort, and +// thus don't return error codes (except RawOpenForWriting()). +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#ifndef NOMINMAX +#define NOMINMAX // @#!$& windows +#endif +#include <windows.h> +typedef HANDLE RawFD; +const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE; +#else +typedef int RawFD; +const RawFD kIllegalRawFD = -1; // what open returns if it fails +#endif // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +RawFD RawOpenForWriting(const char* filename); // uses default permissions +void RawWrite(RawFD fd, const char* buf, size_t len); +void RawClose(RawFD fd); + +#endif // _LOGGING_H_ diff --git a/src/third_party/gperftools-2.7/src/base/low_level_alloc.cc b/src/third_party/gperftools-2.7/src/base/low_level_alloc.cc new file mode 100644 index 00000000000..6b467cff123 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/low_level_alloc.cc @@ -0,0 +1,582 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// A low-level allocator that can be used by other low-level +// modules without introducing dependency cycles. +// This allocator is slow and wasteful of memory; +// it should not be used when performance is key. + +#include "base/low_level_alloc.h" +#include "base/dynamic_annotations.h" +#include "base/spinlock.h" +#include "base/logging.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <new> // for placement-new + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// A first-fit allocator with amortized logarithmic free() time. + +LowLevelAlloc::PagesAllocator::~PagesAllocator() { +} + +// --------------------------------------------------------------------------- +static const int kMaxLevel = 30; + +// We put this class-only struct in a namespace to avoid polluting the +// global namespace with this struct name (thus risking an ODR violation). +namespace low_level_alloc_internal { + // This struct describes one allocated block, or one free block. + struct AllocList { + struct Header { + intptr_t size; // size of entire region, including this field. Must be + // first. Valid in both allocated and unallocated blocks + intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this + LowLevelAlloc::Arena *arena; // pointer to parent arena + void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) + } header; + + // Next two fields: in unallocated blocks: freelist skiplist data + // in allocated blocks: overlaps with client data + int levels; // levels in skiplist used + AllocList *next[kMaxLevel]; // actually has levels elements. + // The AllocList node may not have room for + // all kMaxLevel entries. See max_fit in + // LLA_SkiplistLevels() + }; +} +using low_level_alloc_internal::AllocList; + + +// --------------------------------------------------------------------------- +// A trivial skiplist implementation. This is used to keep the freelist +// in address order while taking only logarithmic time per insert and delete. + +// An integer approximation of log2(size/base) +// Requires size >= base. +static int IntLog2(size_t size, size_t base) { + int result = 0; + for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result) + result++; + } + // floor(size / 2**result) <= base < floor(size / 2**(result-1)) + // => log2(size/(base+1)) <= result < 1+log2(size/base) + // => result ~= log2(size/base) + return result; +} + +// Return a random integer n: p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1. +static int Random() { + static uint32 r = 1; // no locking---it's not critical + ANNOTATE_BENIGN_RACE(&r, "benign race, not critical."); + int result = 1; + while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) { + result++; + } + return result; +} + +// Return a number of skiplist levels for a node of size bytes, where +// base is the minimum node size. Compute level=log2(size / base)+n +// where n is 1 if random is false and otherwise a random number generated with +// the standard distribution for a skiplist: See Random() above. +// Bigger nodes tend to have more skiplist levels due to the log2(size / base) +// term, so first-fit searches touch fewer nodes. "level" is clipped so +// level<kMaxLevel and next[level-1] will fit in the node. +// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel +static int LLA_SkiplistLevels(size_t size, size_t base, bool random) { + // max_fit is the maximum number of levels that will fit in a node for the + // given size. We can't return more than max_fit, no matter what the + // random number generator says. + int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *); + int level = IntLog2(size, base) + (random? Random() : 1); + if (level > max_fit) level = max_fit; + if (level > kMaxLevel-1) level = kMaxLevel - 1; + RAW_CHECK(level >= 1, "block not big enough for even one level"); + return level; +} + +// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e. +// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater +// points to the last element at level i in the AllocList less than *e, or is +// head if no such element exists. +static AllocList *LLA_SkiplistSearch(AllocList *head, + AllocList *e, AllocList **prev) { + AllocList *p = head; + for (int level = head->levels - 1; level >= 0; level--) { + for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) { + } + prev[level] = p; + } + return (head->levels == 0) ? 0 : prev[0]->next[0]; +} + +// Insert element *e into AllocList *head. Set prev[] as LLA_SkiplistSearch. +// Requires that e->levels be previously set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistInsert(AllocList *head, AllocList *e, + AllocList **prev) { + LLA_SkiplistSearch(head, e, prev); + for (; head->levels < e->levels; head->levels++) { // extend prev pointers + prev[head->levels] = head; // to all *e's levels + } + for (int i = 0; i != e->levels; i++) { // add element to list + e->next[i] = prev[i]->next[i]; + prev[i]->next[i] = e; + } +} + +// Remove element *e from AllocList *head. Set prev[] as LLA_SkiplistSearch(). +// Requires that e->levels be previous set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistDelete(AllocList *head, AllocList *e, + AllocList **prev) { + AllocList *found = LLA_SkiplistSearch(head, e, prev); + RAW_CHECK(e == found, "element not in freelist"); + for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) { + prev[i]->next[i] = e->next[i]; + } + while (head->levels > 0 && head->next[head->levels - 1] == 0) { + head->levels--; // reduce head->levels if level unused + } +} + +// --------------------------------------------------------------------------- +// Arena implementation + +struct LowLevelAlloc::Arena { + Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init + explicit Arena(int) : pagesize(0) {} // set pagesize to zero explicitly + // for non-static init + + SpinLock mu; // protects freelist, allocation_count, + // pagesize, roundup, min_size + AllocList freelist; // head of free list; sorted by addr (under mu) + int32 allocation_count; // count of allocated blocks (under mu) + int32 flags; // flags passed to NewArena (ro after init) + size_t pagesize; // ==getpagesize() (init under mu, then ro) + size_t roundup; // lowest power of 2 >= max(16,sizeof (AllocList)) + // (init under mu, then ro) + size_t min_size; // smallest allocation block size + // (init under mu, then ro) + PagesAllocator *allocator; +}; + +// The default arena, which is used when 0 is passed instead of an Arena +// pointer. +static struct LowLevelAlloc::Arena default_arena; + +// Non-malloc-hooked arenas: used only to allocate metadata for arenas that +// do not want malloc hook reporting, so that for them there's no malloc hook +// reporting even during arena creation. +static struct LowLevelAlloc::Arena unhooked_arena; +static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena; + +namespace { + + class DefaultPagesAllocator : public LowLevelAlloc::PagesAllocator { + public: + virtual ~DefaultPagesAllocator() {}; + virtual void *MapPages(int32 flags, size_t size); + virtual void UnMapPages(int32 flags, void *addr, size_t size); + }; + +} + +// magic numbers to identify allocated and unallocated blocks +static const intptr_t kMagicAllocated = 0x4c833e95; +static const intptr_t kMagicUnallocated = ~kMagicAllocated; + +namespace { + class SCOPED_LOCKABLE ArenaLock { + public: + explicit ArenaLock(LowLevelAlloc::Arena *arena) + EXCLUSIVE_LOCK_FUNCTION(arena->mu) + : left_(false), mask_valid_(false), arena_(arena) { + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + // We've decided not to support async-signal-safe arena use until + // there a demonstrated need. Here's how one could do it though + // (would need to be made more portable). +#if 0 + sigset_t all; + sigfillset(&all); + this->mask_valid_ = + (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0); +#else + RAW_CHECK(false, "We do not yet support async-signal-safe arena."); +#endif + } + this->arena_->mu.Lock(); + } + ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } + void Leave() /*UNLOCK_FUNCTION()*/ { + this->arena_->mu.Unlock(); +#if 0 + if (this->mask_valid_) { + pthread_sigmask(SIG_SETMASK, &this->mask_, 0); + } +#endif + this->left_ = true; + } + private: + bool left_; // whether left region + bool mask_valid_; +#if 0 + sigset_t mask_; // old mask of blocked signals +#endif + LowLevelAlloc::Arena *arena_; + DISALLOW_COPY_AND_ASSIGN(ArenaLock); + }; +} // anonymous namespace + +// create an appropriate magic number for an object at "ptr" +// "magic" should be kMagicAllocated or kMagicUnallocated +inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) { + return magic ^ reinterpret_cast<intptr_t>(ptr); +} + +// Initialize the fields of an Arena +static void ArenaInit(LowLevelAlloc::Arena *arena) { + if (arena->pagesize == 0) { + arena->pagesize = getpagesize(); + // Round up block sizes to a power of two close to the header size. + arena->roundup = 16; + while (arena->roundup < sizeof (arena->freelist.header)) { + arena->roundup += arena->roundup; + } + // Don't allocate blocks less than twice the roundup size to avoid tiny + // free blocks. + arena->min_size = 2 * arena->roundup; + arena->freelist.header.size = 0; + arena->freelist.header.magic = + Magic(kMagicUnallocated, &arena->freelist.header); + arena->freelist.header.arena = arena; + arena->freelist.levels = 0; + memset(arena->freelist.next, 0, sizeof (arena->freelist.next)); + arena->allocation_count = 0; + if (arena == &default_arena) { + // Default arena should be hooked, e.g. for heap-checker to trace + // pointer chains through objects in the default arena. + arena->flags = LowLevelAlloc::kCallMallocHook; + } else if (arena == &unhooked_async_sig_safe_arena) { + arena->flags = LowLevelAlloc::kAsyncSignalSafe; + } else { + arena->flags = 0; // other arenas' flags may be overridden by client, + // but unhooked_arena will have 0 in 'flags'. + } + arena->allocator = LowLevelAlloc::GetDefaultPagesAllocator(); + } +} + +// L < meta_data_arena->mu +LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, + Arena *meta_data_arena) { + return NewArenaWithCustomAlloc(flags, meta_data_arena, NULL); +} + +// L < meta_data_arena->mu +LowLevelAlloc::Arena *LowLevelAlloc::NewArenaWithCustomAlloc(int32 flags, + Arena *meta_data_arena, + PagesAllocator *allocator) { + RAW_CHECK(meta_data_arena != 0, "must pass a valid arena"); + if (meta_data_arena == &default_arena) { + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + meta_data_arena = &unhooked_async_sig_safe_arena; + } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) { + meta_data_arena = &unhooked_arena; + } + } + // Arena(0) uses the constructor for non-static contexts + Arena *result = + new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0); + ArenaInit(result); + result->flags = flags; + if (allocator) { + result->allocator = allocator; + } + return result; +} + +// L < arena->mu, L < arena->arena->mu +bool LowLevelAlloc::DeleteArena(Arena *arena) { + RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena, + "may not delete default arena"); + ArenaLock section(arena); + bool empty = (arena->allocation_count == 0); + section.Leave(); + if (empty) { + while (arena->freelist.next[0] != 0) { + AllocList *region = arena->freelist.next[0]; + size_t size = region->header.size; + arena->freelist.next[0] = region->next[0]; + RAW_CHECK(region->header.magic == + Magic(kMagicUnallocated, ®ion->header), + "bad magic number in DeleteArena()"); + RAW_CHECK(region->header.arena == arena, + "bad arena pointer in DeleteArena()"); + RAW_CHECK(size % arena->pagesize == 0, + "empty arena has non-page-aligned block size"); + RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0, + "empty arena has non-page-aligned block"); + int munmap_result; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, + "LowLevelAlloc::DeleteArena: munmap failed address"); + } + Free(arena); + } + return empty; +} + +// --------------------------------------------------------------------------- + +// Return value rounded up to next multiple of align. +// align must be a power of two. +static intptr_t RoundUp(intptr_t addr, intptr_t align) { + return (addr + align - 1) & ~(align - 1); +} + +// Equivalent to "return prev->next[i]" but with sanity checking +// that the freelist is in the correct order, that it +// consists of regions marked "unallocated", and that no two regions +// are adjacent in memory (they should have been coalesced). +// L < arena->mu +static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) { + RAW_CHECK(i < prev->levels, "too few levels in Next()"); + AllocList *next = prev->next[i]; + if (next != 0) { + RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header), + "bad magic number in Next()"); + RAW_CHECK(next->header.arena == arena, + "bad arena pointer in Next()"); + if (prev != &arena->freelist) { + RAW_CHECK(prev < next, "unordered freelist"); + RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size < + reinterpret_cast<char *>(next), "malformed freelist"); + } + } + return next; +} + +// Coalesce list item "a" with its successor if they are adjacent. +static void Coalesce(AllocList *a) { + AllocList *n = a->next[0]; + if (n != 0 && reinterpret_cast<char *>(a) + a->header.size == + reinterpret_cast<char *>(n)) { + LowLevelAlloc::Arena *arena = a->header.arena; + a->header.size += n->header.size; + n->header.magic = 0; + n->header.arena = 0; + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, n, prev); + LLA_SkiplistDelete(&arena->freelist, a, prev); + a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true); + LLA_SkiplistInsert(&arena->freelist, a, prev); + } +} + +// Adds block at location "v" to the free list +// L >= arena->mu +static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in AddToFreelist()"); + RAW_CHECK(f->header.arena == arena, + "bad arena pointer in AddToFreelist()"); + f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true); + AllocList *prev[kMaxLevel]; + LLA_SkiplistInsert(&arena->freelist, f, prev); + f->header.magic = Magic(kMagicUnallocated, &f->header); + Coalesce(f); // maybe coalesce with successor + Coalesce(prev[0]); // maybe coalesce with predecessor +} + +// Frees storage allocated by LowLevelAlloc::Alloc(). +// L < arena->mu +void LowLevelAlloc::Free(void *v) { + if (v != 0) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in Free()"); + LowLevelAlloc::Arena *arena = f->header.arena; + if ((arena->flags & kCallMallocHook) != 0) { + MallocHook::InvokeDeleteHook(v); + } + ArenaLock section(arena); + AddToFreelist(v, arena); + RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free"); + arena->allocation_count--; + section.Leave(); + } +} + +// allocates and returns a block of size bytes, to be freed with Free() +// L < arena->mu +static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { + void *result = 0; + if (request != 0) { + AllocList *s; // will point to region that satisfies request + ArenaLock section(arena); + ArenaInit(arena); + // round up with header + size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup); + for (;;) { // loop until we find a suitable region + // find the minimum levels that a block of this size must have + int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1; + if (i < arena->freelist.levels) { // potential blocks exist + AllocList *before = &arena->freelist; // predecessor of s + while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) { + before = s; + } + if (s != 0) { // we found a region + break; + } + } + // we unlock before mmap() both because mmap() may call a callback hook, + // and because it may be slow. + arena->mu.Unlock(); + // mmap generous 64K chunks to decrease + // the chances/impact of fragmentation: + size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16); + void *new_pages = arena->allocator->MapPages(arena->flags, new_pages_size); + arena->mu.Lock(); + s = reinterpret_cast<AllocList *>(new_pages); + s->header.size = new_pages_size; + // Pretend the block is allocated; call AddToFreelist() to free it. + s->header.magic = Magic(kMagicAllocated, &s->header); + s->header.arena = arena; + AddToFreelist(&s->levels, arena); // insert new region into free list + } + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, s, prev); // remove from free list + // s points to the first free region that's big enough + if (req_rnd + arena->min_size <= s->header.size) { // big enough to split + AllocList *n = reinterpret_cast<AllocList *> + (req_rnd + reinterpret_cast<char *>(s)); + n->header.size = s->header.size - req_rnd; + n->header.magic = Magic(kMagicAllocated, &n->header); + n->header.arena = arena; + s->header.size = req_rnd; + AddToFreelist(&n->levels, arena); + } + s->header.magic = Magic(kMagicAllocated, &s->header); + RAW_CHECK(s->header.arena == arena, ""); + arena->allocation_count++; + section.Leave(); + result = &s->levels; + } + ANNOTATE_NEW_MEMORY(result, request); + return result; +} + +void *LowLevelAlloc::Alloc(size_t request) { + void *result = DoAllocWithArena(request, &default_arena); + if ((default_arena.flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) { + RAW_CHECK(arena != 0, "must pass a valid arena"); + void *result = DoAllocWithArena(request, arena); + if ((arena->flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() { + return &default_arena; +} + +static DefaultPagesAllocator *default_pages_allocator; +static union { + char chars[sizeof(DefaultPagesAllocator)]; + void *ptr; +} debug_pages_allocator_space; + +LowLevelAlloc::PagesAllocator *LowLevelAlloc::GetDefaultPagesAllocator(void) { + if (default_pages_allocator) { + return default_pages_allocator; + } + default_pages_allocator = new (debug_pages_allocator_space.chars) DefaultPagesAllocator(); + return default_pages_allocator; +} + +void *DefaultPagesAllocator::MapPages(int32 flags, size_t size) { + void *new_pages; + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + new_pages = MallocHook::UnhookedMMap(0, size, + PROT_WRITE|PROT_READ, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } else { + new_pages = mmap(0, size, + PROT_WRITE|PROT_READ, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } + RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); + + return new_pages; +} + +void DefaultPagesAllocator::UnMapPages(int32 flags, void *region, size_t size) { + int munmap_result; + if ((flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, + "LowLevelAlloc::DeleteArena: munmap failed address"); +} diff --git a/src/third_party/gperftools-2.7/src/base/low_level_alloc.h b/src/third_party/gperftools-2.7/src/base/low_level_alloc.h new file mode 100644 index 00000000000..d8dfc8f3929 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/low_level_alloc.h @@ -0,0 +1,120 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(_BASE_LOW_LEVEL_ALLOC_H_) +#define _BASE_LOW_LEVEL_ALLOC_H_ + +// A simple thread-safe memory allocator that does not depend on +// mutexes or thread-specific data. It is intended to be used +// sparingly, and only when malloc() would introduce an unwanted +// dependency, such as inside the heap-checker. + +#include <config.h> +#include <stddef.h> // for size_t +#include "base/basictypes.h" + +class LowLevelAlloc { + public: + class PagesAllocator { + public: + virtual ~PagesAllocator(); + virtual void *MapPages(int32 flags, size_t size) = 0; + virtual void UnMapPages(int32 flags, void *addr, size_t size) = 0; + }; + + static PagesAllocator *GetDefaultPagesAllocator(void); + + struct Arena; // an arena from which memory may be allocated + + // Returns a pointer to a block of at least "request" bytes + // that have been newly allocated from the specific arena. + // for Alloc() call the DefaultArena() is used. + // Returns 0 if passed request==0. + // Does not return 0 under other circumstances; it crashes if memory + // is not available. + static void *Alloc(size_t request) + ATTRIBUTE_SECTION(malloc_hook); + static void *AllocWithArena(size_t request, Arena *arena) + ATTRIBUTE_SECTION(malloc_hook); + + // Deallocates a region of memory that was previously allocated with + // Alloc(). Does nothing if passed 0. "s" must be either 0, + // or must have been returned from a call to Alloc() and not yet passed to + // Free() since that call to Alloc(). The space is returned to the arena + // from which it was allocated. + static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook); + + // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free + // are to put all callers of MallocHook::Invoke* in this module + // into special section, + // so that MallocHook::GetCallerStackTrace can function accurately. + + // Create a new arena. + // The root metadata for the new arena is allocated in the + // meta_data_arena; the DefaultArena() can be passed for meta_data_arena. + // These values may be ored into flags: + enum { + // Report calls to Alloc() and Free() via the MallocHook interface. + // Set in the DefaultArena. + kCallMallocHook = 0x0001, + + // Make calls to Alloc(), Free() be async-signal-safe. Not set in + // DefaultArena(). + kAsyncSignalSafe = 0x0002, + + // When used with DefaultArena(), the NewArena() and DeleteArena() calls + // obey the flags given explicitly in the NewArena() call, even if those + // flags differ from the settings in DefaultArena(). So the call + // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe, + // as well as generatating an arena that provides async-signal-safe + // Alloc/Free. + }; + static Arena *NewArena(int32 flags, Arena *meta_data_arena); + + // note: pages allocator will never be destroyed and allocated pages will never be freed + // When allocator is NULL, it's same as NewArena + static Arena *NewArenaWithCustomAlloc(int32 flags, Arena *meta_data_arena, PagesAllocator *allocator); + + // Destroys an arena allocated by NewArena and returns true, + // provided no allocated blocks remain in the arena. + // If allocated blocks remain in the arena, does nothing and + // returns false. + // It is illegal to attempt to destroy the DefaultArena(). + static bool DeleteArena(Arena *arena); + + // The default arena that always exists. + static Arena *DefaultArena(); + + private: + LowLevelAlloc(); // no instances +}; + +#endif diff --git a/src/third_party/gperftools-2.7/src/base/simple_mutex.h b/src/third_party/gperftools-2.7/src/base/simple_mutex.h new file mode 100644 index 00000000000..a1886e46f31 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/simple_mutex.h @@ -0,0 +1,332 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein. +// +// A simple mutex wrapper, supporting locks and read-write locks. +// You should assume the locks are *not* re-entrant. +// +// To use: you should define the following macros in your configure.ac: +// ACX_PTHREAD +// AC_RWLOCK +// The latter is defined in ../autoconf. +// +// This class is meant to be internal-only and should be wrapped by an +// internal namespace. Before you use this module, please give the +// name of your internal namespace for this module. Or, if you want +// to expose it, you'll want to move it to the Google namespace. We +// cannot put this class in global namespace because there can be some +// problems when we have multiple versions of Mutex in each shared object. +// +// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG +// mode. +// +// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy: +// http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html +// Because of that, we might as well use windows locks for +// cygwin. They seem to be more reliable than the cygwin pthreads layer. +// +// TRICKY IMPLEMENTATION NOTE: +// This class is designed to be safe to use during +// dynamic-initialization -- that is, by global constructors that are +// run before main() starts. The issue in this case is that +// dynamic-initialization happens in an unpredictable order, and it +// could be that someone else's dynamic initializer could call a +// function that tries to acquire this mutex -- but that all happens +// before this mutex's constructor has run. (This can happen even if +// the mutex and the function that uses the mutex are in the same .cc +// file.) Basically, because Mutex does non-trivial work in its +// constructor, it's not, in the naive implementation, safe to use +// before dynamic initialization has run on it. +// +// The solution used here is to pair the actual mutex primitive with a +// bool that is set to true when the mutex is dynamically initialized. +// (Before that it's false.) Then we modify all mutex routines to +// look at the bool, and not try to lock/unlock until the bool makes +// it to true (which happens after the Mutex constructor has run.) +// +// This works because before main() starts -- particularly, during +// dynamic initialization -- there are no threads, so a) it's ok that +// the mutex operations are a no-op, since we don't need locking then +// anyway; and b) we can be quite confident our bool won't change +// state between a call to Lock() and a call to Unlock() (that would +// require a global constructor in one translation unit to call Lock() +// and another global constructor in another translation unit to call +// Unlock() later, which is pretty perverse). +// +// That said, it's tricky, and can conceivably fail; it's safest to +// avoid trying to acquire a mutex in a global constructor, if you +// can. One way it can fail is that a really smart compiler might +// initialize the bool to true at static-initialization time (too +// early) rather than at dynamic-initialization time. To discourage +// that, we set is_safe_ to true in code (not the constructor +// colon-initializer) and set it to true via a function that always +// evaluates to true, but that the compiler can't know always +// evaluates to true. This should be good enough. +// +// A related issue is code that could try to access the mutex +// after it's been destroyed in the global destructors (because +// the Mutex global destructor runs before some other global +// destructor, that tries to acquire the mutex). The way we +// deal with this is by taking a constructor arg that global +// mutexes should pass in, that causes the destructor to do no +// work. We still depend on the compiler not doing anything +// weird to a Mutex's memory after it is destroyed, but for a +// static global variable, that's pretty safe. + +#ifndef GOOGLE_MUTEX_H_ +#define GOOGLE_MUTEX_H_ + +#include <config.h> + +#if defined(NO_THREADS) + typedef int MutexType; // to keep a lock-count +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN // We only need minimal includes +# endif + // We need Windows NT or later for TryEnterCriticalSection(). If you + // don't need that functionality, you can remove these _WIN32_WINNT + // lines, and change TryLock() to assert(0) or something. +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0400 +# endif +# include <windows.h> + typedef CRITICAL_SECTION MutexType; +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + // Needed for pthread_rwlock_*. If it causes problems, you could take it + // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it + // *does* cause problems for FreeBSD, or MacOSX, but isn't needed + // for locking there.) +# ifdef __linux__ +# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls +# endif +# include <pthread.h> + typedef pthread_rwlock_t MutexType; +#elif defined(HAVE_PTHREAD) +# include <pthread.h> + typedef pthread_mutex_t MutexType; +#else +# error Need to implement mutex.h for your architecture, or #define NO_THREADS +#endif + +#include <assert.h> +#include <stdlib.h> // for abort() + +#define MUTEX_NAMESPACE perftools_mutex_namespace + +namespace MUTEX_NAMESPACE { + +class Mutex { + public: + // This is used for the single-arg constructor + enum LinkerInitialized { LINKER_INITIALIZED }; + + // Create a Mutex that is not held by anybody. This constructor is + // typically used for Mutexes allocated on the heap or the stack. + inline Mutex(); + // This constructor should be used for global, static Mutex objects. + // It inhibits work being done by the destructor, which makes it + // safer for code that tries to acqiure this mutex in their global + // destructor. + inline Mutex(LinkerInitialized); + + // Destructor + inline ~Mutex(); + + inline void Lock(); // Block if needed until free then acquire exclusively + inline void Unlock(); // Release a lock acquired via Lock() + inline bool TryLock(); // If free, Lock() and return true, else return false + // Note that on systems that don't support read-write locks, these may + // be implemented as synonyms to Lock() and Unlock(). So you can use + // these for efficiency, but don't use them anyplace where being able + // to do shared reads is necessary to avoid deadlock. + inline void ReaderLock(); // Block until free or shared then acquire a share + inline void ReaderUnlock(); // Release a read share of this Mutex + inline void WriterLock() { Lock(); } // Acquire an exclusive lock + inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() + + private: + MutexType mutex_; + // We want to make sure that the compiler sets is_safe_ to true only + // when we tell it to, and never makes assumptions is_safe_ is + // always true. volatile is the most reliable way to do that. + volatile bool is_safe_; + // This indicates which constructor was called. + bool destroy_; + + inline void SetIsSafe() { is_safe_ = true; } + + // Catch the error of writing Mutex when intending MutexLock. + Mutex(Mutex* /*ignored*/) {} + // Disallow "evil" constructors + Mutex(const Mutex&); + void operator=(const Mutex&); +}; + +// Now the implementation of Mutex for various systems +#if defined(NO_THREADS) + +// When we don't have threads, we can be either reading or writing, +// but not both. We can have lots of readers at once (in no-threads +// mode, that's most likely to happen in recursive function calls), +// but only one writer. We represent this by having mutex_ be -1 when +// writing and a number > 0 when reading (and 0 when no lock is held). +// +// In debug mode, we assert these invariants, while in non-debug mode +// we do nothing, for efficiency. That's why everything is in an +// assert. + +Mutex::Mutex() : mutex_(0) { } +Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { } +Mutex::~Mutex() { assert(mutex_ == 0); } +void Mutex::Lock() { assert(--mutex_ == -1); } +void Mutex::Unlock() { assert(mutex_++ == -1); } +bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } +void Mutex::ReaderLock() { assert(++mutex_ > 0); } +void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } + +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +Mutex::Mutex() : destroy_(true) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::Mutex(LinkerInitialized) : destroy_(false) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::~Mutex() { if (destroy_) DeleteCriticalSection(&mutex_); } +void Mutex::Lock() { if (is_safe_) EnterCriticalSection(&mutex_); } +void Mutex::Unlock() { if (is_safe_) LeaveCriticalSection(&mutex_); } +bool Mutex::TryLock() { return is_safe_ ? + TryEnterCriticalSection(&mutex_) != 0 : true; } +void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +void Mutex::ReaderUnlock() { Unlock(); } + +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_rwlock_trywrlock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); } +void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +#undef SAFE_PTHREAD + +#elif defined(HAVE_PTHREAD) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_mutex_trylock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { Lock(); } +void Mutex::ReaderUnlock() { Unlock(); } +#undef SAFE_PTHREAD + +#endif + +// -------------------------------------------------------------------------- +// Some helper classes + +// MutexLock(mu) acquires mu when constructed and releases it when destroyed. +class MutexLock { + public: + explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } + ~MutexLock() { mu_->Unlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + MutexLock(const MutexLock&); + void operator=(const MutexLock&); +}; + +// ReaderMutexLock and WriterMutexLock do the same, for rwlocks +class ReaderMutexLock { + public: + explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } + ~ReaderMutexLock() { mu_->ReaderUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + ReaderMutexLock(const ReaderMutexLock&); + void operator=(const ReaderMutexLock&); +}; + +class WriterMutexLock { + public: + explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } + ~WriterMutexLock() { mu_->WriterUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + WriterMutexLock(const WriterMutexLock&); + void operator=(const WriterMutexLock&); +}; + +// Catch bug where variable name is omitted, e.g. MutexLock (&mu); +#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name) +#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) +#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) + +} // namespace MUTEX_NAMESPACE + +using namespace MUTEX_NAMESPACE; + +#undef MUTEX_NAMESPACE + +#endif /* #define GOOGLE_SIMPLE_MUTEX_H_ */ diff --git a/src/third_party/gperftools-2.7/src/base/spinlock.cc b/src/third_party/gperftools-2.7/src/base/spinlock.cc new file mode 100644 index 00000000000..85ff21ed404 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock.cc @@ -0,0 +1,129 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +#include <config.h> +#include "base/spinlock.h" +#include "base/spinlock_internal.h" +#include "base/sysinfo.h" /* for GetSystemCPUsCount() */ + +// NOTE on the Lock-state values: +// +// kSpinLockFree represents the unlocked state +// kSpinLockHeld represents the locked state with no waiters +// kSpinLockSleeper represents the locked state with waiters + +static int adaptive_spin_count = 0; + +const base::LinkerInitialized SpinLock::LINKER_INITIALIZED = + base::LINKER_INITIALIZED; + +namespace { +struct SpinLock_InitHelper { + SpinLock_InitHelper() { + // On multi-cpu machines, spin for longer before yielding + // the processor or sleeping. Reduces idle time significantly. + if (GetSystemCPUsCount() > 1) { + adaptive_spin_count = 1000; + } + } +}; + +// Hook into global constructor execution: +// We do not do adaptive spinning before that, +// but nothing lock-intensive should be going on at that time. +static SpinLock_InitHelper init_helper; + +inline void SpinlockPause(void) { +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + __asm__ __volatile__("rep; nop" : : ); +#endif +} + +} // unnamed namespace + +// Monitor the lock to see if its value changes within some time +// period (adaptive_spin_count loop iterations). The last value read +// from the lock is returned from the method. +Atomic32 SpinLock::SpinLoop() { + int c = adaptive_spin_count; + while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) { + SpinlockPause(); + } + return base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockSleeper); +} + +void SpinLock::SlowLock() { + Atomic32 lock_value = SpinLoop(); + + int lock_wait_call_count = 0; + while (lock_value != kSpinLockFree) { + // If the lock is currently held, but not marked as having a sleeper, mark + // it as having a sleeper. + if (lock_value == kSpinLockHeld) { + // Here, just "mark" that the thread is going to sleep. Don't store the + // lock wait time in the lock as that will cause the current lock + // owner to think it experienced contention. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockHeld, + kSpinLockSleeper); + if (lock_value == kSpinLockHeld) { + // Successfully transitioned to kSpinLockSleeper. Pass + // kSpinLockSleeper to the SpinLockDelay routine to properly indicate + // the last lock_value observed. + lock_value = kSpinLockSleeper; + } else if (lock_value == kSpinLockFree) { + // Lock is free again, so try and acquire it before sleeping. The + // new lock state will be the number of cycles this thread waited if + // this thread obtains the lock. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockFree, + kSpinLockSleeper); + continue; // skip the delay at the end of the loop + } + } + + // Wait for an OS specific delay. + base::internal::SpinLockDelay(&lockword_, lock_value, + ++lock_wait_call_count); + // Spin again after returning from the wait routine to give this thread + // some chance of obtaining the lock. + lock_value = SpinLoop(); + } +} + +void SpinLock::SlowUnlock() { + // wake waiter if necessary + base::internal::SpinLockWake(&lockword_, false); +} diff --git a/src/third_party/gperftools-2.7/src/base/spinlock.h b/src/third_party/gperftools-2.7/src/base/spinlock.h new file mode 100644 index 00000000000..7243aeaaefd --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock.h @@ -0,0 +1,143 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// SpinLock is async signal safe. +// If used within a signal handler, all lock holders +// should block the signal even outside the signal handler. + +#ifndef BASE_SPINLOCK_H_ +#define BASE_SPINLOCK_H_ + +#include <config.h> +#include "base/atomicops.h" +#include "base/basictypes.h" +#include "base/dynamic_annotations.h" +#include "base/thread_annotations.h" + +class LOCKABLE SpinLock { + public: + SpinLock() : lockword_(kSpinLockFree) { } + + // Special constructor for use with static SpinLock objects. E.g., + // + // static SpinLock lock(base::LINKER_INITIALIZED); + // + // When intialized using this constructor, we depend on the fact + // that the linker has already initialized the memory appropriately. + // A SpinLock constructed like this can be freely used from global + // initializers without worrying about the order in which global + // initializers run. + explicit SpinLock(base::LinkerInitialized /*x*/) { + // Does nothing; lockword_ is already initialized + } + + // Acquire this SpinLock. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ { + if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) != kSpinLockFree) { + SlowLock(); + } + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + + // Try to acquire this SpinLock without blocking and return true if the + // acquisition was successful. If the lock was not acquired, false is + // returned. If this SpinLock is free at the time of the call, TryLock + // will return true with high probability. + inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { + bool res = + (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) == kSpinLockFree); + if (res) { + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + return res; + } + + // Release this SpinLock, which must be held by the calling thread. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Unlock() /*UNLOCK_FUNCTION()*/ { + ANNOTATE_RWLOCK_RELEASED(this, 1); + uint64 prev_value = static_cast<uint64>( + base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree)); + if (prev_value != kSpinLockHeld) { + // Speed the wakeup of any waiter. + SlowUnlock(); + } + } + + // Determine if the lock is held. When the lock is held by the invoking + // thread, true will always be returned. Intended to be used as + // CHECK(lock.IsHeld()). + inline bool IsHeld() const { + return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree; + } + + static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat + private: + enum { kSpinLockFree = 0 }; + enum { kSpinLockHeld = 1 }; + enum { kSpinLockSleeper = 2 }; + + volatile Atomic32 lockword_; + + void SlowLock(); + void SlowUnlock(); + Atomic32 SpinLoop(); + + DISALLOW_COPY_AND_ASSIGN(SpinLock); +}; + +// Corresponding locker object that arranges to acquire a spinlock for +// the duration of a C++ scope. +class SCOPED_LOCKABLE SpinLockHolder { + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l) + : lock_(l) { + l->Lock(); + } + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); } +}; +// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock); +#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name) + + +#endif // BASE_SPINLOCK_H_ diff --git a/src/third_party/gperftools-2.7/src/base/spinlock_internal.cc b/src/third_party/gperftools-2.7/src/base/spinlock_internal.cc new file mode 100644 index 00000000000..d9629717be1 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock_internal.cc @@ -0,0 +1,102 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// The OS-specific header included below must provide two calls: +// base::internal::SpinLockDelay() and base::internal::SpinLockWake(). +// See spinlock_internal.h for the spec of SpinLockWake(). + +// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) +// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a +// spin loop on location *w, whose previously observed value was "value". +// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick, +// or may wait for a delay that can be truncated by a call to SpinlockWake(w). +// In all cases, it must return in bounded time even if SpinlockWake() is not +// called. + +#include "base/spinlock_internal.h" + +// forward declaration for use by spinlock_*-inl.h +namespace base { namespace internal { static int SuggestedDelayNS(int loop); }} + +#if defined(_WIN32) +#include "base/spinlock_win32-inl.h" +#elif defined(__linux__) +#include "base/spinlock_linux-inl.h" +#else +#include "base/spinlock_posix-inl.h" +#endif + +namespace base { +namespace internal { + +// Return a suggested delay in nanoseconds for iteration number "loop" +static int SuggestedDelayNS(int loop) { + // Weak pseudo-random number generator to get some spread between threads + // when many are spinning. +#ifdef BASE_HAS_ATOMIC64 + static base::subtle::Atomic64 rand; + uint64 r = base::subtle::NoBarrier_Load(&rand); + r = 0x5deece66dLL * r + 0xb; // numbers from nrand48() + base::subtle::NoBarrier_Store(&rand, r); + + r <<= 16; // 48-bit random number now in top 48-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 48 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (44 - (loop >> 3)); +#else + static Atomic32 rand; + uint32 r = base::subtle::NoBarrier_Load(&rand); + r = 0x343fd * r + 0x269ec3; // numbers from MSVC++ + base::subtle::NoBarrier_Store(&rand, r); + + r <<= 1; // 31-bit random number now in top 31-bits. + if (loop < 0 || loop > 32) { // limit loop to 0..32 + loop = 32; + } + // loop>>3 cannot exceed 4 because loop cannot exceed 32. + // Select top 20..24 bits of lower 31 bits, + // giving approximately 0ms to 16ms. + // Mean is exponential in loop for first 32 iterations, then 8ms. + // The futex path multiplies this by 16, since we expect explicit wakeups + // almost always on that path. + return r >> (12 - (loop >> 3)); +#endif +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.7/src/base/spinlock_internal.h b/src/third_party/gperftools-2.7/src/base/spinlock_internal.h new file mode 100644 index 00000000000..aa47e67d4e0 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock_internal.h @@ -0,0 +1,51 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is an internal part spinlock.cc and once.cc + * It may not be used directly by code outside of //base. + */ + +#ifndef BASE_SPINLOCK_INTERNAL_H_ +#define BASE_SPINLOCK_INTERNAL_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/atomicops.h" + +namespace base { +namespace internal { + +void SpinLockWake(volatile Atomic32 *w, bool all); +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop); + +} // namespace internal +} // namespace base +#endif diff --git a/src/third_party/gperftools-2.7/src/base/spinlock_linux-inl.h b/src/third_party/gperftools-2.7/src/base/spinlock_linux-inl.h new file mode 100644 index 00000000000..aadf62a4b67 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock_linux-inl.h @@ -0,0 +1,101 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Linux-specific part of spinlock_internal.cc + */ + +#include <errno.h> +#include <sched.h> +#include <time.h> +#include <limits.h> +#include "base/linux_syscall_support.h" + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_PRIVATE_FLAG 128 + +static bool have_futex; +static int futex_private_flag = FUTEX_PRIVATE_FLAG; + +namespace { +static struct InitModule { + InitModule() { + int x = 0; + // futexes are ints, so we can use them only when + // that's the same size as the lockword_ in SpinLock. + have_futex = (sizeof (Atomic32) == sizeof (int) && + sys_futex(&x, FUTEX_WAKE, 1, NULL, NULL, 0) >= 0); + if (have_futex && + sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, NULL, NULL, 0) < 0) { + futex_private_flag = 0; + } + } +} init_module; + +} // anonymous namespace + + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop != 0) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + if (have_futex) { + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + } else { + tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin + } + if (have_futex) { + tm.tv_nsec *= 16; // increase the delay; we expect explicit wakeups + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAIT | futex_private_flag, + value, reinterpret_cast<struct kernel_timespec *>(&tm), + NULL, 0); + } else { + nanosleep(&tm, NULL); + } + errno = save_errno; + } +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { + if (have_futex) { + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAKE | futex_private_flag, all? INT_MAX : 1, + NULL, NULL, 0); + } +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.7/src/base/spinlock_posix-inl.h b/src/third_party/gperftools-2.7/src/base/spinlock_posix-inl.h new file mode 100644 index 00000000000..e73a30fb7d8 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock_posix-inl.h @@ -0,0 +1,63 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Posix-specific part of spinlock_internal.cc + */ + +#include <config.h> +#include <errno.h> +#ifdef HAVE_SCHED_H +#include <sched.h> /* For sched_yield() */ +#endif +#include <time.h> /* For nanosleep() */ + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + int save_errno = errno; + if (loop == 0) { + } else if (loop == 1) { + sched_yield(); + } else { + struct timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = base::internal::SuggestedDelayNS(loop); + nanosleep(&tm, NULL); + } + errno = save_errno; +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.7/src/base/spinlock_win32-inl.h b/src/third_party/gperftools-2.7/src/base/spinlock_win32-inl.h new file mode 100644 index 00000000000..956b9653e6d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/spinlock_win32-inl.h @@ -0,0 +1,54 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Win32-specific part of spinlock_internal.cc + */ + + +#include <windows.h> + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop == 0) { + } else if (loop == 1) { + Sleep(0); + } else { + Sleep(base::internal::SuggestedDelayNS(loop) / 1000000); + } +} + +void SpinLockWake(volatile Atomic32 *w, bool all) { +} + +} // namespace internal +} // namespace base diff --git a/src/third_party/gperftools-2.7/src/base/stl_allocator.h b/src/third_party/gperftools-2.7/src/base/stl_allocator.h new file mode 100644 index 00000000000..2345f463c24 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/stl_allocator.h @@ -0,0 +1,98 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + + +#ifndef BASE_STL_ALLOCATOR_H_ +#define BASE_STL_ALLOCATOR_H_ + +#include <config.h> + +#include <stddef.h> // for ptrdiff_t +#include <limits> + +#include "base/logging.h" + +// Generic allocator class for STL objects +// that uses a given type-less allocator Alloc, which must provide: +// static void* Alloc::Allocate(size_t size); +// static void Alloc::Free(void* ptr, size_t size); +// +// STL_Allocator<T, MyAlloc> provides the same thread-safety +// guarantees as MyAlloc. +// +// Usage example: +// set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set; +// CAVEAT: Parts of the code below are probably specific +// to the STL version(s) we are using. +// The code is simply lifted from what std::allocator<> provides. +template <typename T, class Alloc> +class STL_Allocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template <class T1> struct rebind { + typedef STL_Allocator<T1, Alloc> other; + }; + + STL_Allocator() { } + STL_Allocator(const STL_Allocator&) { } + template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { } + ~STL_Allocator() { } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, const void* = 0) { + RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate"); + return static_cast<T*>(Alloc::Allocate(n * sizeof(T))); + } + void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); } + + size_type max_size() const { return size_t(-1) / sizeof(T); } + + void construct(pointer p, const T& val) { ::new(p) T(val); } + void construct(pointer p) { ::new(p) T(); } + void destroy(pointer p) { p->~T(); } + + // There's no state, so these allocators are always equal + bool operator==(const STL_Allocator&) const { return true; } +}; + +#endif // BASE_STL_ALLOCATOR_H_ diff --git a/src/third_party/gperftools-2.7/src/base/sysinfo.cc b/src/third_party/gperftools-2.7/src/base/sysinfo.cc new file mode 100644 index 00000000000..36f706791aa --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/sysinfo.cc @@ -0,0 +1,891 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <config.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) +# define PLATFORM_WINDOWS 1 +#endif + +#include <ctype.h> // for isspace() +#include <stdlib.h> // for getenv() +#include <stdio.h> // for snprintf(), sscanf() +#include <string.h> // for memmove(), memchr(), etc. +#include <fcntl.h> // for open() +#include <errno.h> // for errno +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for read() +#endif +#if defined __MACH__ // Mac OS X, almost certainly +#include <mach-o/dyld.h> // for iterating over dll's in ProcMapsIter +#include <mach-o/loader.h> // for iterating over dll's in ProcMapsIter +#include <sys/types.h> +#include <sys/sysctl.h> // how we figure out numcpu's on OS X +#elif defined __FreeBSD__ +#include <sys/sysctl.h> +#elif defined __sun__ // Solaris +#include <procfs.h> // for, e.g., prmap_t +#elif defined(PLATFORM_WINDOWS) +#include <process.h> // for getpid() (actually, _getpid()) +#include <shlwapi.h> // for SHGetValueA() +#include <tlhelp32.h> // for Module32First() +#endif +#include "base/sysinfo.h" +#include "base/commandlineflags.h" +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/logging.h" + +#ifdef PLATFORM_WINDOWS +#ifdef MODULEENTRY32 +// In a change from the usual W-A pattern, there is no A variant of +// MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. +// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be +// MODULEENTRY32W. These #undefs are the only way I see to get back +// access to the original, ascii struct (and related functions). +#undef MODULEENTRY32 +#undef Module32First +#undef Module32Next +#undef PMODULEENTRY32 +#undef LPMODULEENTRY32 +#endif /* MODULEENTRY32 */ +// MinGW doesn't seem to define this, perhaps some windowsen don't either. +#ifndef TH32CS_SNAPMODULE32 +#define TH32CS_SNAPMODULE32 0 +#endif /* TH32CS_SNAPMODULE32 */ +#endif /* PLATFORM_WINDOWS */ + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +// open/read/close can set errno, which may be illegal at this +// time, so prefer making the syscalls directly if we can. +#ifdef HAVE_SYS_SYSCALL_H +# include <sys/syscall.h> +#endif +#ifdef SYS_open // solaris 11, at least sometimes, only defines SYS_openat +# define safeopen(filename, mode) syscall(SYS_open, filename, mode) +#else +# define safeopen(filename, mode) open(filename, mode) +#endif +#ifdef SYS_read +# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size) +#else +# define saferead(fd, buffer, size) read(fd, buffer, size) +#endif +#ifdef SYS_close +# define safeclose(fd) syscall(SYS_close, fd) +#else +# define safeclose(fd) close(fd) +#endif + +// ---------------------------------------------------------------------- +// GetenvBeforeMain() +// GetUniquePathFromEnv() +// Some non-trivial getenv-related functions. +// ---------------------------------------------------------------------- + +// we reimplement memcmp and friends to avoid depending on any glibc +// calls too early in the process lifetime. This allows us to use +// GetenvBeforeMain from inside ifunc handler +static int slow_memcmp(const void *_a, const void *_b, size_t n) { + const uint8_t *a = reinterpret_cast<const uint8_t *>(_a); + const uint8_t *b = reinterpret_cast<const uint8_t *>(_b); + while (n-- != 0) { + uint8_t ac = *a++; + uint8_t bc = *b++; + if (ac != bc) { + if (ac < bc) { + return -1; + } + return 1; + } + } + return 0; +} + +static const char *slow_memchr(const char *s, int c, size_t n) { + uint8_t ch = static_cast<uint8_t>(c); + while (n--) { + if (*s++ == ch) { + return s - 1; + } + } + return 0; +} + +static size_t slow_strlen(const char *s) { + const char *s2 = slow_memchr(s, '\0', static_cast<size_t>(-1)); + return s2 - s; +} + +// It's not safe to call getenv() in the malloc hooks, because they +// might be called extremely early, before libc is done setting up +// correctly. In particular, the thread library may not be done +// setting up errno. So instead, we use the built-in __environ array +// if it exists, and otherwise read /proc/self/environ directly, using +// system calls to read the file, and thus avoid setting errno. +// /proc/self/environ has a limit of how much data it exports (around +// 8K), so it's not an ideal solution. +const char* GetenvBeforeMain(const char* name) { + const int namelen = slow_strlen(name); +#if defined(HAVE___ENVIRON) // if we have it, it's declared in unistd.h + if (__environ) { // can exist but be NULL, if statically linked + for (char** p = __environ; *p; p++) { + if (!slow_memcmp(*p, name, namelen) && (*p)[namelen] == '=') + return *p + namelen+1; + } + return NULL; + } +#endif +#if defined(PLATFORM_WINDOWS) + // TODO(mbelshe) - repeated calls to this function will overwrite the + // contents of the static buffer. + static char envvar_buf[1024]; // enough to hold any envvar we care about + if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1)) + return NULL; + return envvar_buf; +#endif + // static is ok because this function should only be called before + // main(), when we're single-threaded. + static char envbuf[16<<10]; + if (*envbuf == '\0') { // haven't read the environ yet + int fd = safeopen("/proc/self/environ", O_RDONLY); + // The -2 below guarantees the last two bytes of the buffer will be \0\0 + if (fd == -1 || // unable to open the file, fall back onto libc + saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file + RAW_VLOG(1, "Unable to open /proc/self/environ, falling back " + "on getenv(\"%s\"), which may not work", name); + if (fd != -1) safeclose(fd); + return getenv(name); + } + safeclose(fd); + } + const char* p = envbuf; + while (*p != '\0') { // will happen at the \0\0 that terminates the buffer + // proc file has the format NAME=value\0NAME=value\0NAME=value\0... + const char* endp = (char*)slow_memchr(p, '\0', + sizeof(envbuf) - (p - envbuf)); + if (endp == NULL) // this entry isn't NUL terminated + return NULL; + else if (!slow_memcmp(p, name, namelen) && p[namelen] == '=') // it's a match + return p + namelen+1; // point after = + p = endp + 1; + } + return NULL; // env var never found +} + +extern "C" { + const char* TCMallocGetenvSafe(const char* name) { + return GetenvBeforeMain(name); + } +} + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. If the env var doesn't exist, +// or is the empty string, leave path unchanged and returns false. +// The reason this is non-trivial is that this function handles munged +// pathnames. Here's why: +// +// If we're a child process of the 'main' process, we can't just use +// getenv("CPUPROFILE") -- the parent process will be using that path. +// Instead we append our pid to the pathname. How do we tell if we're a +// child process? Ideally we'd set an environment variable that all +// our children would inherit. But -- and this is seemingly a bug in +// gcc -- if you do a setenv() in a shared libarary in a global +// constructor, the environment setting is lost by the time main() is +// called. The only safe thing we can do in such a situation is to +// modify the existing envvar. So we do a hack: in the parent, we set +// the high bit of the 1st char of CPUPROFILE. In the child, we +// notice the high bit is set and append the pid(). This works +// assuming cpuprofile filenames don't normally have the high bit set +// in their first character! If that assumption is violated, we'll +// still get a profile, but one with an unexpected name. +// TODO(csilvers): set an envvar instead when we can do it reliably. +bool GetUniquePathFromEnv(const char* env_name, char* path) { + char* envval = getenv(env_name); + if (envval == NULL || *envval == '\0') + return false; + if (envval[0] & 128) { // high bit is set + snprintf(path, PATH_MAX, "%c%s_%u", // add pid and clear high bit + envval[0] & 127, envval+1, (unsigned int)(getpid())); + } else { + snprintf(path, PATH_MAX, "%s", envval); + envval[0] |= 128; // set high bit for kids to see + } + return true; +} + +void SleepForMilliseconds(int milliseconds) { +#ifdef PLATFORM_WINDOWS + _sleep(milliseconds); // Windows's _sleep takes milliseconds argument +#else + // Sleep for a few milliseconds + struct timespec sleep_time; + sleep_time.tv_sec = milliseconds / 1000; + sleep_time.tv_nsec = (milliseconds % 1000) * 1000000; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +#endif +} + +int GetSystemCPUsCount() +{ +#if defined(PLATFORM_WINDOWS) + // Get the number of processors. + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#else + long rv = sysconf(_SC_NPROCESSORS_ONLN); + if (rv < 0) { + return 1; + } + return static_cast<int>(rv); +#endif +} + +// ---------------------------------------------------------------------- + +#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__ +static void ConstructFilename(const char* spec, pid_t pid, + char* buf, int buf_size) { + CHECK_LT(snprintf(buf, buf_size, + spec, + static_cast<int>(pid ? pid : getpid())), buf_size); +} +#endif + +// A templatized helper function instantiated for Mach (OS X) only. +// It can handle finding info for both 32 bits and 64 bits. +// Returns true if it successfully handled the hdr, false else. +#ifdef __MACH__ // Mac OS X, almost certainly +template<uint32_t kMagic, uint32_t kLCSegment, + typename MachHeader, typename SegmentCommand> +static bool NextExtMachHelper(const mach_header* hdr, + int current_image, int current_load_cmd, + uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + static char kDefaultPerms[5] = "r-xp"; + if (hdr->magic != kMagic) + return false; + const char* lc = (const char *)hdr + sizeof(MachHeader); + // TODO(csilvers): make this not-quadradic (increment and hold state) + for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd + lc += ((const load_command *)lc)->cmdsize; + if (((const load_command *)lc)->cmd == kLCSegment) { + const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image); + const SegmentCommand* sc = (const SegmentCommand *)lc; + if (start) *start = sc->vmaddr + dlloff; + if (end) *end = sc->vmaddr + sc->vmsize + dlloff; + if (flags) *flags = kDefaultPerms; // can we do better? + if (offset) *offset = sc->fileoff; + if (inode) *inode = 0; + if (filename) + *filename = const_cast<char*>(_dyld_get_image_name(current_image)); + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; // could we use sc->filesize? + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } + + return false; +} +#endif + +// Finds |c| in |text|, and assign '\0' at the found position. +// The original character at the modified position should be |c|. +// A pointer to the modified position is stored in |endptr|. +// |endptr| should not be NULL. +static bool ExtractUntilChar(char *text, int c, char **endptr) { + CHECK_NE(text, NULL); + CHECK_NE(endptr, NULL); + char *found; + found = strchr(text, c); + if (found == NULL) { + *endptr = NULL; + return false; + } + + *endptr = found; + *found = '\0'; + return true; +} + +// Increments |*text_pointer| while it points a whitespace character. +// It is to follow sscanf's whilespace handling. +static void SkipWhileWhitespace(char **text_pointer, int c) { + if (isspace(c)) { + while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) { + ++(*text_pointer); + } + } +} + +template<class T> +static T StringToInteger(char *text, char **endptr, int base) { + assert(false); + return T(); +} + +template<> +int StringToInteger<int>(char *text, char **endptr, int base) { + return strtol(text, endptr, base); +} + +template<> +int64 StringToInteger<int64>(char *text, char **endptr, int base) { + return strtoll(text, endptr, base); +} + +template<> +uint64 StringToInteger<uint64>(char *text, char **endptr, int base) { + return strtoull(text, endptr, base); +} + +template<typename T> +static T StringToIntegerUntilChar( + char *text, int base, int c, char **endptr_result) { + CHECK_NE(endptr_result, NULL); + *endptr_result = NULL; + + char *endptr_extract; + if (!ExtractUntilChar(text, c, &endptr_extract)) + return 0; + + T result; + char *endptr_strto; + result = StringToInteger<T>(text, &endptr_strto, base); + *endptr_extract = c; + + if (endptr_extract != endptr_strto) + return 0; + + *endptr_result = endptr_extract; + SkipWhileWhitespace(endptr_result, c); + + return result; +} + +static char *CopyStringUntilChar( + char *text, unsigned out_len, int c, char *out) { + char *endptr; + if (!ExtractUntilChar(text, c, &endptr)) + return NULL; + + strncpy(out, text, out_len); + out[out_len-1] = '\0'; + *endptr = c; + + SkipWhileWhitespace(&endptr, c); + return endptr; +} + +template<typename T> +static bool StringToIntegerUntilCharWithCheck( + T *outptr, char *text, int base, int c, char **endptr) { + *outptr = StringToIntegerUntilChar<T>(*endptr, base, c, endptr); + if (*endptr == NULL || **endptr == '\0') return false; + ++(*endptr); + return true; +} + +static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end, + char *flags, uint64 *offset, + int *major, int *minor, int64 *inode, + unsigned *filename_offset) { +#if defined(__linux__) + /* + * It's similar to: + * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n", + * start, end, flags, offset, major, minor, inode, filename_offset) + */ + char *endptr = text; + if (endptr == NULL || *endptr == '\0') return false; + + if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr)) + return false; + + endptr = CopyStringUntilChar(endptr, 5, ' ', flags); + if (endptr == NULL || *endptr == '\0') return false; + ++endptr; + + if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr)) + return false; + + if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr)) + return false; + + *filename_offset = (endptr - text); + return true; +#else + return false; +#endif +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid) { + Init(pid, NULL, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) { + Init(pid, buffer, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + Init(pid, buffer, use_maps_backing); +} + +void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + pid_ = pid; + using_maps_backing_ = use_maps_backing; + dynamic_buffer_ = NULL; + if (!buffer) { + // If the user didn't pass in any buffer storage, allocate it + // now. This is the normal case; the signal handler passes in a + // static buffer. + buffer = dynamic_buffer_ = new Buffer; + } else { + dynamic_buffer_ = NULL; + } + + ibuf_ = buffer->buf_; + + stext_ = etext_ = nextline_ = ibuf_; + ebuf_ = ibuf_ + Buffer::kBufSize - 1; + nextline_ = ibuf_; + +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + if (use_maps_backing) { // don't bother with clever "self" stuff in this case + ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize); + } else if (pid == 0) { + // We have to kludge a bit to deal with the args ConstructFilename + // expects. The 1 is never used -- it's only impt. that it's not 0. + ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize); + } + // No error logging since this can be called from the crash dump + // handler at awkward moments. Users should call Valid() before + // using. + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__FreeBSD__) + // We don't support maps_backing on freebsd + if (pid == 0) { + ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__sun__) + if (pid == 0) { + ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__MACH__) + current_image_ = _dyld_image_count(); // count down from the top + current_load_cmd_ = -1; +#elif defined(PLATFORM_WINDOWS) + snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | + TH32CS_SNAPMODULE32, + GetCurrentProcessId()); + memset(&module_, 0, sizeof(module_)); +#else + fd_ = -1; // so Valid() is always false +#endif + +} + +ProcMapsIterator::~ProcMapsIterator() { +#if defined(PLATFORM_WINDOWS) + if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_); +#elif defined(__MACH__) + // no cleanup necessary! +#else + if (fd_ >= 0) NO_INTR(close(fd_)); +#endif + delete dynamic_buffer_; +} + +bool ProcMapsIterator::Valid() const { +#if defined(PLATFORM_WINDOWS) + return snapshot_ != INVALID_HANDLE_VALUE; +#elif defined(__MACH__) + return 1; +#else + return fd_ != -1; +#endif +} + +bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename) { + return NextExt(start, end, flags, offset, inode, filename, NULL, NULL, + NULL, NULL, NULL); +} + +// This has too many arguments. It should really be building +// a map object and returning it. The problem is that this is called +// when the memory allocator state is undefined, hence the arguments. +bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + +#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + do { + // Advance to the start of the next line + stext_ = nextline_; + + // See if we have a complete line in the buffer already + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_)); + if (!nextline_) { + // Shift/fill the buffer so we do have a line + int count = etext_ - stext_; + + // Move the current text to the start of the buffer + memmove(ibuf_, stext_, count); + stext_ = ibuf_; + etext_ = ibuf_ + count; + + int nread = 0; // fill up buffer with text + while (etext_ < ebuf_) { + NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_)); + if (nread > 0) + etext_ += nread; + else + break; + } + + // Zero out remaining characters in buffer at EOF to avoid returning + // garbage from subsequent calls. + if (etext_ != ebuf_ && nread == 0) { + memset(etext_, 0, ebuf_ - etext_); + } + *etext_ = '\n'; // sentinel; safe because ibuf extends 1 char beyond ebuf + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_)); + } + *nextline_ = 0; // turn newline into nul + nextline_ += ((nextline_ < etext_)? 1 : 0); // skip nul if not end of text + // stext_ now points at a nul-terminated line + uint64 tmpstart, tmpend, tmpoffset; + int64 tmpinode; + int major, minor; + unsigned filename_offset = 0; +#if defined(__linux__) + // for now, assume all linuxes have the same format + if (!ParseProcMapsLine( + stext_, + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + offset ? offset : &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset)) continue; +#elif defined(__CYGWIN__) || defined(__CYGWIN32__) + // cygwin is like linux, except the third field is the "entry point" + // rather than the offset (see format_process_maps at + // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src + // Offset is always be 0 on cygwin: cygwin implements an mmap + // by loading the whole file and then calling NtMapViewOfSection. + // Cygwin also seems to set its flags kinda randomly; use windows default. + char tmpflags[5]; + if (offset) + *offset = 0; + strcpy(flags_, "r-xp"); + if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n", + start ? start : &tmpstart, + end ? end : &tmpend, + tmpflags, + &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset) != 7) continue; +#elif defined(__FreeBSD__) + // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup + tmpstart = tmpend = tmpoffset = 0; + tmpinode = 0; + major = minor = 0; // can't get this info in freebsd + if (inode) + *inode = 0; // nor this + if (offset) + *offset = 0; // seems like this should be in there, but maybe not + // start end resident privateresident obj(?) prot refcnt shadowcnt + // flags copy_on_write needs_copy type filename: + // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat + if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n", + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + &filename_offset) != 3) continue; +#endif + + // Depending on the Linux kernel being used, there may or may not be a space + // after the inode if there is no filename. sscanf will in such situations + // nondeterministically either fill in filename_offset or not (the results + // differ on multiple calls in the same run even with identical arguments). + // We don't want to wander off somewhere beyond the end of the string. + size_t stext_length = strlen(stext_); + if (filename_offset == 0 || filename_offset > stext_length) + filename_offset = stext_length; + + // We found an entry + if (flags) *flags = flags_; + if (filename) *filename = stext_ + filename_offset; + if (dev) *dev = minor | (major << 8); + + if (using_maps_backing_) { + // Extract and parse physical page backing info. + char *backing_ptr = stext_ + filename_offset + + strlen(stext_+filename_offset); + + // find the second '(' + int paren_count = 0; + while (--backing_ptr > stext_) { + if (*backing_ptr == '(') { + ++paren_count; + if (paren_count >= 2) { + uint64 tmp_file_mapping; + uint64 tmp_file_pages; + uint64 tmp_anon_mapping; + uint64 tmp_anon_pages; + + sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")", + file_mapping ? file_mapping : &tmp_file_mapping, + file_pages ? file_pages : &tmp_file_pages, + anon_mapping ? anon_mapping : &tmp_anon_mapping, + anon_pages ? anon_pages : &tmp_anon_pages); + // null terminate the file name (there is a space + // before the first (. + backing_ptr[-1] = 0; + break; + } + } + } + } + + return true; + } while (etext_ > ibuf_); +#elif defined(__sun__) + // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1 + static char kPerms[8][4] = { "---", "--x", "-w-", "-wx", + "r--", "r-x", "rw-", "rwx" }; + COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4); + COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2); + COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1); + Buffer object_path; + int nread = 0; // fill up buffer with text + NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t))); + if (nread == sizeof(prmap_t)) { + long inode_from_mapname = 0; + prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_); + // Best-effort attempt to get the inode from the filename. I think the + // two middle ints are major and minor device numbers, but I'm not sure. + sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname); + + if (pid_ == 0) { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/self/path/%s", mapinfo->pr_mapname), + Buffer::kBufSize); + } else { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/%d/path/%s", + static_cast<int>(pid_), mapinfo->pr_mapname), + Buffer::kBufSize); + } + ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); + CHECK_LT(len, PATH_MAX); + if (len < 0) + len = 0; + current_filename_[len] = '\0'; + + if (start) *start = mapinfo->pr_vaddr; + if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size; + if (flags) *flags = kPerms[mapinfo->pr_mflags & 7]; + if (offset) *offset = mapinfo->pr_offset; + if (inode) *inode = inode_from_mapname; + if (filename) *filename = current_filename_; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#elif defined(__MACH__) + // We return a separate entry for each segment in the DLL. (TODO(csilvers): + // can we do better?) A DLL ("image") has load-commands, some of which + // talk about segment boundaries. + // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912 + for (; current_image_ >= 0; current_image_--) { + const mach_header* hdr = _dyld_get_image_header(current_image_); + if (!hdr) continue; + if (current_load_cmd_ < 0) // set up for this image + current_load_cmd_ = hdr->ncmds; // again, go from the top down + + // We start with the next load command (we've already looked at this one). + for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) { +#ifdef MH_MAGIC_64 + if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64, + struct mach_header_64, struct segment_command_64>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } +#endif + if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT, + struct mach_header, struct segment_command>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } + } + // If we get here, no more load_cmd's in this image talk about + // segments. Go on to the next image. + } +#elif defined(PLATFORM_WINDOWS) + static char kDefaultPerms[5] = "r-xp"; + BOOL ok; + if (module_.dwSize == 0) { // only possible before first call + module_.dwSize = sizeof(module_); + ok = Module32First(snapshot_, &module_); + } else { + ok = Module32Next(snapshot_, &module_); + } + if (ok) { + uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr); + if (start) *start = base_addr; + if (end) *end = base_addr + module_.modBaseSize; + if (flags) *flags = kDefaultPerms; + if (offset) *offset = 0; + if (inode) *inode = 0; + if (filename) *filename = module_.szExePath; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#endif + + // We didn't find anything + return false; +} + +int ProcMapsIterator::FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, + const char *filename, dev_t dev) { + // We assume 'flags' looks like 'rwxp' or 'rwx'. + char r = (flags && flags[0] == 'r') ? 'r' : '-'; + char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-'; + char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-'; + // p always seems set on linux, so we set the default to 'p', not '-' + char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p') + ? '-' : 'p'; + + const int rc = snprintf(buffer, bufsize, + "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n", + start, end, r,w,x,p, offset, + static_cast<int>(dev/256), static_cast<int>(dev%256), + inode, filename); + return (rc < 0 || rc >= bufsize) ? 0 : rc; +} + +namespace tcmalloc { + +// Helper to add the list of mapped shared libraries to a profile. +// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size' +// and return the actual size occupied in 'buf'. We fill wrote_all to true +// if we successfully wrote all proc lines to buf, false else. +// We do not provision for 0-terminating 'buf'. +int FillProcSelfMaps(char buf[], int size, bool* wrote_all) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + int bytes_written = 0; + *wrote_all = true; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + const int line_length = it.FormatLine(buf + bytes_written, + size - bytes_written, + start, end, flags, offset, + inode, filename, 0); + if (line_length == 0) + *wrote_all = false; // failed to write this line out + else + bytes_written += line_length; + + } + return bytes_written; +} + +// Dump the same data as FillProcSelfMaps reads to fd. +// It seems easier to repeat parts of FillProcSelfMaps here than to +// reuse it via a call. +void DumpProcSelfMaps(RawFD fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + RawWrite(fd, linebuf.buf_, written); + } +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/base/sysinfo.h b/src/third_party/gperftools-2.7/src/base/sysinfo.h new file mode 100644 index 00000000000..e30b0d4d1a5 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/sysinfo.h @@ -0,0 +1,232 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// All functions here are thread-hostile due to file caching unless +// commented otherwise. + +#ifndef _SYSINFO_H_ +#define _SYSINFO_H_ + +#include <config.h> + +#include <time.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) +#include <windows.h> // for DWORD +#include <tlhelp32.h> // for CreateToolhelp32Snapshot +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for pid_t +#endif +#include <stddef.h> // for size_t +#include <limits.h> // for PATH_MAX +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD + +// This getenv function is safe to call before the C runtime is initialized. +// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses +// /proc/self/environ instead calling getenv(). It's intended to be used in +// routines that run before main(), when the state required for getenv() may +// not be set up yet. In particular, errno isn't set up until relatively late +// (after the pthreads library has a chance to make it threadsafe), and +// getenv() doesn't work until then. +// On some platforms, this call will utilize the same, static buffer for +// repeated GetenvBeforeMain() calls. Callers should not expect pointers from +// this routine to be long lived. +// Note that on unix, /proc only has the environment at the time the +// application was started, so this routine ignores setenv() calls/etc. Also +// note it only reads the first 16K of the environment. +extern const char* GetenvBeforeMain(const char* name); + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. Non-trivial for surprising +// reasons, as documented in sysinfo.cc. path must have space PATH_MAX. +extern bool GetUniquePathFromEnv(const char* env_name, char* path); + +extern int GetSystemCPUsCount(); + +void SleepForMilliseconds(int milliseconds); + +// Return true if we're running POSIX (e.g., NPTL on Linux) threads, +// as opposed to a non-POSIX thread library. The thing that we care +// about is whether a thread's pid is the same as the thread that +// spawned it. If so, this function returns true. +// Thread-safe. +// Note: We consider false negatives to be OK. +bool HasPosixThreads(); + +#ifndef SWIG // SWIG doesn't like struct Buffer and variable arguments. + +// A ProcMapsIterator abstracts access to /proc/maps for a given +// process. Needs to be stack-allocatable and avoid using stdio/malloc +// so it can be used in the google stack dumper, heap-profiler, etc. +// +// On Windows and Mac OS X, this iterator iterates *only* over DLLs +// mapped into this process space. For Linux, FreeBSD, and Solaris, +// it iterates over *all* mapped memory regions, including anonymous +// mmaps. For other O/Ss, it is unlikely to work at all, and Valid() +// will always return false. Also note: this routine only works on +// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab: +// proc /proc procfs rw 0 0 +class ProcMapsIterator { + public: + struct Buffer { +#ifdef __FreeBSD__ + // FreeBSD requires us to read all of the maps file at once, so + // we have to make a buffer that's "always" big enough + static const size_t kBufSize = 102400; +#else // a one-line buffer is good enough + static const size_t kBufSize = PATH_MAX + 1024; +#endif + char buf_[kBufSize]; + }; + + + // Create a new iterator for the specified pid. pid can be 0 for "self". + explicit ProcMapsIterator(pid_t pid); + + // Create an iterator with specified storage (for use in signal + // handler). "buffer" should point to a ProcMapsIterator::Buffer + // buffer can be NULL in which case a bufer will be allocated. + ProcMapsIterator(pid_t pid, Buffer *buffer); + + // Iterate through maps_backing instead of maps if use_maps_backing + // is true. Otherwise the same as above. buffer can be NULL and + // it will allocate a buffer itself. + ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing); + + // Returns true if the iterator successfully initialized; + bool Valid() const; + + // Returns a pointer to the most recently parsed line. Only valid + // after Next() returns true, and until the iterator is destroyed or + // Next() is called again. This may give strange results on non-Linux + // systems. Prefer FormatLine() if that may be a concern. + const char *CurrentLine() const { return stext_; } + + // Writes the "canonical" form of the /proc/xxx/maps info for a single + // line to the passed-in buffer. Returns the number of bytes written, + // or 0 if it was not able to write the complete line. (To guarantee + // success, buffer should have size at least Buffer::kBufSize.) + // Takes as arguments values set via a call to Next(). The + // "canonical" form of the line (taken from linux's /proc/xxx/maps): + // <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)> + + // <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the + // eg + // 08048000-0804c000 r-xp 00000000 03:01 3793678 /bin/cat + // If you don't have the dev_t (dev), feel free to pass in 0. + // (Next() doesn't return a dev_t, though NextExt does.) + // + // Note: if filename and flags were obtained via a call to Next(), + // then the output of this function is only valid if Next() returned + // true, and only until the iterator is destroyed or Next() is + // called again. (Since filename, at least, points into CurrentLine.) + static int FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, const char *filename, + dev_t dev); + + // Find the next entry in /proc/maps; return true if found or false + // if at the end of the file. + // + // Any of the result pointers can be NULL if you're not interested + // in those values. + // + // If "flags" and "filename" are passed, they end up pointing to + // storage within the ProcMapsIterator that is valid only until the + // iterator is destroyed or Next() is called again. The caller may + // modify the contents of these strings (up as far as the first NUL, + // and only until the subsequent call to Next()) if desired. + + // The offsets are all uint64 in order to handle the case of a + // 32-bit process running on a 64-bit kernel + // + // IMPORTANT NOTE: see top-of-class notes for details about what + // mapped regions Next() iterates over, depending on O/S. + // TODO(csilvers): make flags and filename const. + bool Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename); + + bool NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev); + + ~ProcMapsIterator(); + + private: + void Init(pid_t pid, Buffer *buffer, bool use_maps_backing); + + char *ibuf_; // input buffer + char *stext_; // start of text + char *etext_; // end of text + char *nextline_; // start of next line + char *ebuf_; // end of buffer (1 char for a nul) +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) + HANDLE snapshot_; // filehandle on dll info + // In a change from the usual W-A pattern, there is no A variant of + // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. + // We want the original A variants, and this #undef is the only + // way I see to get them. Redefining it when we're done prevents us + // from affecting other .cc files. +# ifdef MODULEENTRY32 // Alias of W +# undef MODULEENTRY32 + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# define MODULEENTRY32 MODULEENTRY32W +# else // It's the ascii, the one we want. + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# endif +#elif defined(__MACH__) + int current_image_; // dll's are called "images" in macos parlance + int current_load_cmd_; // the segment of this dll we're examining +#elif defined(__sun__) // Solaris + int fd_; + char current_filename_[PATH_MAX]; +#else + int fd_; // filehandle on /proc/*/maps +#endif + pid_t pid_; + char flags_[10]; + Buffer* dynamic_buffer_; // dynamically-allocated Buffer + bool using_maps_backing_; // true if we are looking at maps_backing instead of maps. +}; + +#endif /* #ifndef SWIG */ + +// Helper routines + +namespace tcmalloc { +int FillProcSelfMaps(char buf[], int size, bool* wrote_all); +void DumpProcSelfMaps(RawFD fd); +} + +#endif /* #ifndef _SYSINFO_H_ */ diff --git a/src/third_party/gperftools-2.7/src/base/thread_annotations.h b/src/third_party/gperftools-2.7/src/base/thread_annotations.h new file mode 100644 index 00000000000..f57b2999ee7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/thread_annotations.h @@ -0,0 +1,134 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Le-Chun Wu +// +// This header file contains the macro definitions for thread safety +// annotations that allow the developers to document the locking policies +// of their multi-threaded code. The annotations can also help program +// analysis tools to identify potential thread safety issues. +// +// The annotations are implemented using GCC's "attributes" extension. +// Using the macros defined here instead of the raw GCC attributes allows +// for portability and future compatibility. +// +// This functionality is not yet fully implemented in perftools, +// but may be one day. + +#ifndef BASE_THREAD_ANNOTATIONS_H_ +#define BASE_THREAD_ANNOTATIONS_H_ + + +#if defined(__GNUC__) \ + && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \ + && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG)) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif + + +// Document if a shared variable/field needs to be protected by a lock. +// GUARDED_BY allows the user to specify a particular lock that should be +// held when accessing the annotated variable, while GUARDED_VAR only +// indicates a shared variable should be guarded (by any lock). GUARDED_VAR +// is primarily used when the client cannot express the name of the lock. +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded) + +// Document if the memory location pointed to by a pointer should be guarded +// by a lock when dereferencing the pointer. Similar to GUARDED_VAR, +// PT_GUARDED_VAR is primarily used when the client cannot express the name +// of the lock. Note that a pointer variable to a shared memory location +// could itself be a shared variable. For example, if a shared global pointer +// q, which is guarded by mu1, points to a shared memory location that is +// guarded by mu2, q should be annotated as follows: +// int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2); +#define PT_GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x)) +#define PT_GUARDED_VAR \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded) + +// Document the acquisition order between locks that can be held +// simultaneously by a thread. For any two locks that need to be annotated +// to establish an acquisition order, only one of them needs the annotation. +// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER +// and ACQUIRED_BEFORE.) +#define ACQUIRED_AFTER(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x)) +#define ACQUIRED_BEFORE(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x)) + +// The following three annotations document the lock requirements for +// functions/methods. + +// Document if a function expects certain locks to be held before it is called +#define EXCLUSIVE_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x)) + +#define SHARED_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x)) + +// Document the locks acquired in the body of the function. These locks +// cannot be held when calling this function (as google3's Mutex locks are +// non-reentrant). +#define LOCKS_EXCLUDED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x)) + +// Document the lock the annotated function returns without acquiring it. +#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) + +// Document if a class/type is a lockable type (such as the Mutex class). +#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable) + +// Document if a class is a scoped lockable type (such as the MutexLock class). +#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) + +// The following annotations specify lock and unlock primitives. +#define EXCLUSIVE_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x)) + +#define SHARED_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x)) + +#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x)) + +#define SHARED_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x)) + +#define UNLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(unlock(x)) + +// An escape hatch for thread safety analysis to ignore the annotated function. +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) + +#endif // BASE_THREAD_ANNOTATIONS_H_ diff --git a/src/third_party/gperftools-2.7/src/base/thread_lister.c b/src/third_party/gperftools-2.7/src/base/thread_lister.c new file mode 100644 index 00000000000..9dc8d721892 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/thread_lister.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "config.h" + +#include "base/thread_lister.h" + +#include <stdio.h> /* needed for NULL on some powerpc platforms (?!) */ +#include <sys/types.h> +#include <unistd.h> /* for getpid */ + +#ifdef HAVE_SYS_PRCTL +# include <sys/prctl.h> +#endif + +#include "base/linuxthreads.h" +/* Include other thread listers here that define THREADS macro + * only when they can provide a good implementation. + */ + +#ifndef THREADS + +/* Default trivial thread lister for single-threaded applications, + * or if the multi-threading code has not been ported, yet. + */ + +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + int rc; + va_list ap; + pid_t pid; + +#ifdef HAVE_SYS_PRCTL + int dumpable = prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + prctl(PR_SET_DUMPABLE, 1); +#endif + va_start(ap, callback); + pid = getpid(); + rc = callback(parameter, 1, &pid, ap); + va_end(ap); +#ifdef HAVE_SYS_PRCTL + if (!dumpable) + prctl(PR_SET_DUMPABLE, 0); +#endif + return rc; +} + +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + return 1; +} + +#endif /* ifndef THREADS */ diff --git a/src/third_party/gperftools-2.7/src/base/thread_lister.h b/src/third_party/gperftools-2.7/src/base/thread_lister.h new file mode 100644 index 00000000000..6e70b89fef5 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/thread_lister.h @@ -0,0 +1,83 @@ +/* -*- Mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _THREAD_LISTER_H +#define _THREAD_LISTER_H + +#include <stdarg.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*ListAllProcessThreadsCallBack)(void *parameter, + int num_threads, + pid_t *thread_pids, + va_list ap); + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for TCMalloc_ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * TCMalloc_ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int TCMalloc_ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...); + +/* This function resumes the list of all linux threads that + * TCMalloc_ListAllProcessThreads pauses before giving to its + * callback. The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int TCMalloc_ResumeAllProcessThreads(int num_threads, pid_t *thread_pids); + +#ifdef __cplusplus +} +#endif + +#endif /* _THREAD_LISTER_H */ diff --git a/src/third_party/gperftools-2.7/src/base/vdso_support.cc b/src/third_party/gperftools-2.7/src/base/vdso_support.cc new file mode 100644 index 00000000000..f88aa303127 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/vdso_support.cc @@ -0,0 +1,142 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// + +#include "base/vdso_support.h" + +#ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h + +#include <fcntl.h> +#include <stddef.h> // for ptrdiff_t + +#include "base/atomicops.h" // for MemoryBarrier +#include "base/logging.h" +#include "base/dynamic_annotations.h" +#include "base/basictypes.h" // for COMPILE_ASSERT + +using base::subtle::MemoryBarrier; + +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +namespace base { + +const void *VDSOSupport::vdso_base_ = ElfMemImage::kInvalidBase; +VDSOSupport::VDSOSupport() + // If vdso_base_ is still set to kInvalidBase, we got here + // before VDSOSupport::Init has been called. Call it now. + : image_(vdso_base_ == ElfMemImage::kInvalidBase ? Init() : vdso_base_) { +} + +// NOTE: we can't use GoogleOnceInit() below, because we can be +// called by tcmalloc, and none of the *once* stuff may be functional yet. +// +// In addition, we hope that the VDSOSupportHelper constructor +// causes this code to run before there are any threads, and before +// InitGoogle() has executed any chroot or setuid calls. +// +// Finally, even if there is a race here, it is harmless, because +// the operation should be idempotent. +const void *VDSOSupport::Init() { + if (vdso_base_ == ElfMemImage::kInvalidBase) { + // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] + // on stack, and so glibc works as if VDSO was not present. + // But going directly to kernel via /proc/self/auxv below bypasses + // Valgrind zapping. So we check for Valgrind separately. + if (RunningOnValgrind()) { + vdso_base_ = NULL; + return NULL; + } + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + // Kernel too old to have a VDSO. + vdso_base_ = NULL; + return NULL; + } + ElfW(auxv_t) aux; + while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { + if (aux.a_type == AT_SYSINFO_EHDR) { + COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), + unexpected_sizeof_pointer_NE_sizeof_a_val); + vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val); + break; + } + } + close(fd); + if (vdso_base_ == ElfMemImage::kInvalidBase) { + // Didn't find AT_SYSINFO_EHDR in auxv[]. + vdso_base_ = NULL; + } + } + return vdso_base_; +} + +const void *VDSOSupport::SetBase(const void *base) { + CHECK(base != ElfMemImage::kInvalidBase); + const void *old_base = vdso_base_; + vdso_base_ = base; + image_.Init(base); + return old_base; +} + +bool VDSOSupport::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + return image_.LookupSymbol(name, version, type, info); +} + +bool VDSOSupport::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + return image_.LookupSymbolByAddress(address, info_out); +} + +// We need to make sure VDSOSupport::Init() is called before +// the main() runs, since it might do something like setuid or +// chroot. If VDSOSupport +// is used in any global constructor, this will happen, since +// VDSOSupport's constructor calls Init. But if not, we need to +// ensure it here, with a global constructor of our own. This +// is an allowed exception to the normal rule against non-trivial +// global constructors. +static class VDSOInitHelper { + public: + VDSOInitHelper() { VDSOSupport::Init(); } +} vdso_init_helper; +} + +#endif // HAVE_VDSO_SUPPORT diff --git a/src/third_party/gperftools-2.7/src/base/vdso_support.h b/src/third_party/gperftools-2.7/src/base/vdso_support.h new file mode 100644 index 00000000000..c17d22494c7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/base/vdso_support.h @@ -0,0 +1,136 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSO stands for "Virtual Dynamic Shared Object" -- a page of +// executable code, which looks like a shared library, but doesn't +// necessarily exist anywhere on disk, and which gets mmap()ed into +// every process by kernels which support VDSO, such as 2.6.x for 32-bit +// executables, and 2.6.24 and above for 64-bit executables. +// +// More details could be found here: +// http://www.trilithium.com/johan/2005/08/linux-gate/ +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// +// Example usage: +// VDSOSupport vdso; +// VDSOSupport::SymbolInfo info; +// typedef (*FN)(unsigned *, void *, void *); +// FN fn = NULL; +// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { +// fn = reinterpret_cast<FN>(info.address); +// } + +#ifndef BASE_VDSO_SUPPORT_H_ +#define BASE_VDSO_SUPPORT_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/elf_mem_image.h" + +#ifdef HAVE_ELF_MEM_IMAGE + +// Enable VDSO support only for the architectures/operating systems that +// support it. +#if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) +#define HAVE_VDSO_SUPPORT 1 +#endif + +#include <stdlib.h> // for NULL + +namespace base { + +// NOTE: this class may be used from within tcmalloc, and can not +// use any memory allocation routines. +class VDSOSupport { + public: + VDSOSupport(); + + typedef ElfMemImage::SymbolInfo SymbolInfo; + typedef ElfMemImage::SymbolIterator SymbolIterator; + + // Answers whether we have a vdso at all. + bool IsPresent() const { return image_.IsPresent(); } + + // Allow to iterate over all VDSO symbols. + SymbolIterator begin() const { return image_.begin(); } + SymbolIterator end() const { return image_.end(); } + + // Look up versioned dynamic symbol in the kernel VDSO. + // Returns false if VDSO is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if VDSO isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + // Used only for testing. Replace real VDSO base with a mock. + // Returns previous value of vdso_base_. After you are done testing, + // you are expected to call SetBase() with previous value, in order to + // reset state to the way it was. + const void *SetBase(const void *s); + + // Computes vdso_base_ and returns it. Should be called as early as + // possible; before any thread creation, chroot or setuid. + static const void *Init(); + + private: + // image_ represents VDSO ELF image in memory. + // image_.ehdr_ == NULL implies there is no VDSO. + ElfMemImage image_; + + // Cached value of auxv AT_SYSINFO_EHDR, computed once. + // This is a tri-state: + // kInvalidBase => value hasn't been determined yet. + // 0 => there is no VDSO. + // else => vma of VDSO Elf{32,64}_Ehdr. + // + // When testing with mock VDSO, low bit is set. + // The low bit is always available because vdso_base_ is + // page-aligned. + static const void *vdso_base_; + + DISALLOW_COPY_AND_ASSIGN(VDSOSupport); +}; + +} // namespace base + +#endif // HAVE_ELF_MEM_IMAGE + +#endif // BASE_VDSO_SUPPORT_H_ diff --git a/src/third_party/gperftools-2.7/src/central_freelist.cc b/src/third_party/gperftools-2.7/src/central_freelist.cc new file mode 100644 index 00000000000..01a73104184 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/central_freelist.cc @@ -0,0 +1,387 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include "config.h" +#include <algorithm> +#include "central_freelist.h" +#include "internal_logging.h" // for ASSERT, MESSAGE +#include "linked_list.h" // for SLL_Next, SLL_Push, etc +#include "page_heap.h" // for PageHeap +#include "static_vars.h" // for Static + +using std::min; +using std::max; + +namespace tcmalloc { + +void CentralFreeList::Init(size_t cl) { + size_class_ = cl; + tcmalloc::DLL_Init(&empty_); + tcmalloc::DLL_Init(&nonempty_); + num_spans_ = 0; + counter_ = 0; + + max_cache_size_ = kMaxNumTransferEntries; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Disable the transfer cache for the small footprint case. + cache_size_ = 0; +#else + cache_size_ = 16; +#endif + if (cl > 0) { + // Limit the maximum size of the cache based on the size class. If this + // is not done, large size class objects will consume a lot of memory if + // they just sit in the transfer cache. + int32_t bytes = Static::sizemap()->ByteSizeForClass(cl); + int32_t objs_to_move = Static::sizemap()->num_objects_to_move(cl); + + ASSERT(objs_to_move > 0 && bytes > 0); + // Limit each size class cache to at most 1MB of objects or one entry, + // whichever is greater. Total transfer cache memory used across all + // size classes then can't be greater than approximately + // 1MB * kMaxNumTransferEntries. + // min and max are in parens to avoid macro-expansion on windows. + max_cache_size_ = (min)(max_cache_size_, + (max)(1, (1024 * 1024) / (bytes * objs_to_move))); + cache_size_ = (min)(cache_size_, max_cache_size_); + } + used_slots_ = 0; + ASSERT(cache_size_ <= max_cache_size_); +} + +void CentralFreeList::ReleaseListToSpans(void* start) { + while (start) { + void *next = SLL_Next(start); + ReleaseToSpans(start); + start = next; + } +} + +// MapObjectToSpan should logically be part of ReleaseToSpans. But +// this triggers an optimization bug in gcc 4.5.0. Moving to a +// separate function, and making sure that function isn't inlined, +// seems to fix the problem. It also should be fixed for gcc 4.5.1. +static +#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0 +__attribute__ ((noinline)) +#endif +Span* MapObjectToSpan(void* object) { + const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; + Span* span = Static::pageheap()->GetDescriptor(p); + return span; +} + +void CentralFreeList::ReleaseToSpans(void* object) { + Span* span = MapObjectToSpan(object); + ASSERT(span != NULL); + ASSERT(span->refcount > 0); + + // If span is empty, move it to non-empty list + if (span->objects == NULL) { + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&nonempty_, span); + Event(span, 'N', 0); + } + + // The following check is expensive, so it is disabled by default + if (false) { + // Check that object does not occur in list + int got = 0; + for (void* p = span->objects; p != NULL; p = *((void**) p)) { + ASSERT(p != object); + got++; + } + ASSERT(got + span->refcount == + (span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + } + + counter_++; + span->refcount--; + if (span->refcount == 0) { + Event(span, '#', 0); + counter_ -= ((span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + tcmalloc::DLL_Remove(span); + --num_spans_; + + // Release central list lock while operating on pageheap + lock_.Unlock(); + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->Delete(span); + } + lock_.Lock(); + } else { + *(reinterpret_cast<void**>(object)) = span->objects; + span->objects = object; + } +} + +bool CentralFreeList::EvictRandomSizeClass( + int locked_size_class, bool force) { + static int race_counter = 0; + int t = race_counter++; // Updated without a lock, but who cares. + if (t >= Static::num_size_classes()) { + while (t >= Static::num_size_classes()) { + t -= Static::num_size_classes(); + } + race_counter = t; + } + ASSERT(t >= 0); + ASSERT(t < Static::num_size_classes()); + if (t == locked_size_class) return false; + return Static::central_cache()[t].ShrinkCache(locked_size_class, force); +} + +bool CentralFreeList::MakeCacheSpace() { + // Is there room in the cache? + if (used_slots_ < cache_size_) return true; + // Check if we can expand this cache? + if (cache_size_ == max_cache_size_) return false; + // Ok, we'll try to grab an entry from some other size class. + if (EvictRandomSizeClass(size_class_, false) || + EvictRandomSizeClass(size_class_, true)) { + // Succeeded in evicting, we're going to make our cache larger. + // However, we may have dropped and re-acquired the lock in + // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the + // cache_size may have changed. Therefore, check and verify that it is + // still OK to increase the cache_size. + if (cache_size_ < max_cache_size_) { + cache_size_++; + return true; + } + } + return false; +} + + +namespace { +class LockInverter { + private: + SpinLock *held_, *temp_; + public: + inline explicit LockInverter(SpinLock* held, SpinLock *temp) + : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } + inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } +}; +} + +// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses +// LockInverter to release one lock and acquire another in scoped-lock +// style, which our current annotation/analysis does not support. +bool CentralFreeList::ShrinkCache(int locked_size_class, bool force) + NO_THREAD_SAFETY_ANALYSIS { + // Start with a quick check without taking a lock. + if (cache_size_ == 0) return false; + // We don't evict from a full cache unless we are 'forcing'. + if (force == false && used_slots_ == cache_size_) return false; + + // Grab lock, but first release the other lock held by this thread. We use + // the lock inverter to ensure that we never hold two size class locks + // concurrently. That can create a deadlock because there is no well + // defined nesting order. + LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_); + ASSERT(used_slots_ <= cache_size_); + ASSERT(0 <= cache_size_); + if (cache_size_ == 0) return false; + if (used_slots_ == cache_size_) { + if (force == false) return false; + // ReleaseListToSpans releases the lock, so we have to make all the + // updates to the central list before calling it. + cache_size_--; + used_slots_--; + ReleaseListToSpans(tc_slots_[used_slots_].head); + return true; + } + cache_size_--; + return true; +} + +void CentralFreeList::InsertRange(void *start, void *end, int N) { + SpinLockHolder h(&lock_); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + MakeCacheSpace()) { + int slot = used_slots_++; + ASSERT(slot >=0); + ASSERT(slot < max_cache_size_); + TCEntry *entry = &tc_slots_[slot]; + entry->head = start; + entry->tail = end; + return; + } + ReleaseListToSpans(start); +} + +int CentralFreeList::RemoveRange(void **start, void **end, int N) { + ASSERT(N > 0); + lock_.Lock(); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + used_slots_ > 0) { + int slot = --used_slots_; + ASSERT(slot >= 0); + TCEntry *entry = &tc_slots_[slot]; + *start = entry->head; + *end = entry->tail; + lock_.Unlock(); + return N; + } + + int result = 0; + *start = NULL; + *end = NULL; + // TODO: Prefetch multiple TCEntries? + result = FetchFromOneSpansSafe(N, start, end); + if (result != 0) { + while (result < N) { + int n; + void* head = NULL; + void* tail = NULL; + n = FetchFromOneSpans(N - result, &head, &tail); + if (!n) break; + result += n; + SLL_PushRange(start, head, tail); + } + } + lock_.Unlock(); + return result; +} + + +int CentralFreeList::FetchFromOneSpansSafe(int N, void **start, void **end) { + int result = FetchFromOneSpans(N, start, end); + if (!result) { + Populate(); + result = FetchFromOneSpans(N, start, end); + } + return result; +} + +int CentralFreeList::FetchFromOneSpans(int N, void **start, void **end) { + if (tcmalloc::DLL_IsEmpty(&nonempty_)) return 0; + Span* span = nonempty_.next; + + ASSERT(span->objects != NULL); + + int result = 0; + void *prev, *curr; + curr = span->objects; + do { + prev = curr; + curr = *(reinterpret_cast<void**>(curr)); + } while (++result < N && curr != NULL); + + if (curr == NULL) { + // Move to empty list + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&empty_, span); + Event(span, 'E', 0); + } + + *start = span->objects; + *end = prev; + span->objects = curr; + SLL_SetNext(*end, NULL); + span->refcount += result; + counter_ -= result; + return result; +} + +// Fetch memory from the system and add to the central cache freelist. +void CentralFreeList::Populate() { + // Release central list lock while operating on pageheap + lock_.Unlock(); + const size_t npages = Static::sizemap()->class_to_pages(size_class_); + + Span* span; + { + SpinLockHolder h(Static::pageheap_lock()); + span = Static::pageheap()->New(npages); + if (span) Static::pageheap()->RegisterSizeClass(span, size_class_); + } + if (span == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed", npages << kPageShift); + lock_.Lock(); + return; + } + ASSERT(span->length == npages); + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + for (int i = 0; i < npages; i++) { + Static::pageheap()->SetCachedSizeClass(span->start + i, size_class_); + } + + // Split the block into pieces and add to the free-list + // TODO: coloring of objects to avoid cache conflicts? + void** tail = &span->objects; + char* ptr = reinterpret_cast<char*>(span->start << kPageShift); + char* limit = ptr + (npages << kPageShift); + const size_t size = Static::sizemap()->ByteSizeForClass(size_class_); + int num = 0; + while (ptr + size <= limit) { + *tail = ptr; + tail = reinterpret_cast<void**>(ptr); + ptr += size; + num++; + } + ASSERT(ptr <= limit); + *tail = NULL; + span->refcount = 0; // No sub-object in use yet + + // Add span to list of non-empty spans + lock_.Lock(); + tcmalloc::DLL_Prepend(&nonempty_, span); + ++num_spans_; + counter_ += num; +} + +int CentralFreeList::tc_length() { + SpinLockHolder h(&lock_); + return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_); +} + +size_t CentralFreeList::OverheadBytes() { + SpinLockHolder h(&lock_); + if (size_class_ == 0) { // 0 holds the 0-sized allocations + return 0; + } + const size_t pages_per_span = Static::sizemap()->class_to_pages(size_class_); + const size_t object_size = Static::sizemap()->class_to_size(size_class_); + ASSERT(object_size > 0); + const size_t overhead_per_span = (pages_per_span * kPageSize) % object_size; + return num_spans_ * overhead_per_span; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/central_freelist.h b/src/third_party/gperftools-2.7/src/central_freelist.h new file mode 100644 index 00000000000..4148680d20a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/central_freelist.h @@ -0,0 +1,211 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_CENTRAL_FREELIST_H_ +#define TCMALLOC_CENTRAL_FREELIST_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int32_t +#endif +#include "base/spinlock.h" +#include "base/thread_annotations.h" +#include "common.h" +#include "span.h" + +namespace tcmalloc { + +// Data kept per size-class in central cache. +class CentralFreeList { + public: + // A CentralFreeList may be used before its constructor runs. + // So we prevent lock_'s constructor from doing anything to the + // lock_ state. + CentralFreeList() : lock_(base::LINKER_INITIALIZED) { } + + void Init(size_t cl); + + // These methods all do internal locking. + + // Insert the specified range into the central freelist. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(void *start, void *end, int N); + + // Returns the actual number of fetched elements and sets *start and *end. + int RemoveRange(void **start, void **end, int N); + + // Returns the number of free objects in cache. + int length() { + SpinLockHolder h(&lock_); + return counter_; + } + + // Returns the number of free objects in the transfer cache. + int tc_length(); + + // Returns the memory overhead (internal fragmentation) attributable + // to the freelist. This is memory lost when the size of elements + // in a freelist doesn't exactly divide the page-size (an 8192-byte + // page full of 5-byte objects would have 2 bytes memory overhead). + size_t OverheadBytes(); + + // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call + // to set the lock in a consistent state before the fork. + void Lock() { + lock_.Lock(); + } + + void Unlock() { + lock_.Unlock(); + } + + private: + // TransferCache is used to cache transfers of + // sizemap.num_objects_to_move(size_class) back and forth between + // thread caches and the central cache for a given size class. + struct TCEntry { + void *head; // Head of chain of objects. + void *tail; // Tail of chain of objects. + }; + + // A central cache freelist can have anywhere from 0 to kMaxNumTransferEntries + // slots to put link list chains into. +#ifdef TCMALLOC_SMALL_BUT_SLOW + // For the small memory model, the transfer cache is not used. + static const int kMaxNumTransferEntries = 0; +#else + // Starting point for the the maximum number of entries in the transfer cache. + // This actual maximum for a given size class may be lower than this + // maximum value. + static const int kMaxNumTransferEntries = 64; +#endif + + // REQUIRES: lock_ is held + // Remove object from cache and return. + // Return NULL if no free entries in cache. + int FetchFromOneSpans(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Remove object from cache and return. Fetches + // from pageheap if cache is empty. Only returns + // NULL on allocation failure. + int FetchFromOneSpansSafe(int N, void **start, void **end) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release a linked list of objects to spans. + // May temporarily release lock_. + void ReleaseListToSpans(void *start) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release an object to spans. + // May temporarily release lock_. + void ReleaseToSpans(void* object) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Populate cache by fetching from the page heap. + // May temporarily release lock_. + void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock is held. + // Tries to make room for a TCEntry. If the cache is full it will try to + // expand it at the cost of some other cache size. Return false if there is + // no space. + bool MakeCacheSpace() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ for locked_size_class is held. + // Picks a "random" size class to steal TCEntry slot from. In reality it + // just iterates over the sizeclasses but does so without taking a lock. + // Returns true on success. + // May temporarily lock a "random" size class. + static bool EvictRandomSizeClass(int locked_size_class, bool force); + + // REQUIRES: lock_ is *not* held. + // Tries to shrink the Cache. If force is true it will relase objects to + // spans if it allows it to shrink the cache. Return false if it failed to + // shrink the cache. Decrements cache_size_ on succeess. + // May temporarily take lock_. If it takes lock_, the locked_size_class + // lock is released to keep the thread from holding two size class locks + // concurrently which could lead to a deadlock. + bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_); + + // This lock protects all the data members. cached_entries and cache_size_ + // may be looked at without holding the lock. + SpinLock lock_; + + // We keep linked lists of empty and non-empty spans. + size_t size_class_; // My size class + Span empty_; // Dummy header for list of empty spans + Span nonempty_; // Dummy header for list of non-empty spans + size_t num_spans_; // Number of spans in empty_ plus nonempty_ + size_t counter_; // Number of free objects in cache entry + + // Here we reserve space for TCEntry cache slots. Space is preallocated + // for the largest possible number of entries than any one size class may + // accumulate. Not all size classes are allowed to accumulate + // kMaxNumTransferEntries, so there is some wasted space for those size + // classes. + TCEntry tc_slots_[kMaxNumTransferEntries]; + + // Number of currently used cached entries in tc_slots_. This variable is + // updated under a lock but can be read without one. + int32_t used_slots_; + // The current number of slots for this size class. This is an + // adaptive value that is increased if there is lots of traffic + // on a given size class. + int32_t cache_size_; + // Maximum size of the cache for a given size class. + int32_t max_cache_size_; +}; + +// Pads each CentralCache object to multiple of 64 bytes. Since some +// compilers (such as MSVC) don't like it when the padding is 0, I use +// template specialization to remove the padding entirely when +// sizeof(CentralFreeList) is a multiple of 64. +template<int kFreeListSizeMod64> +class CentralFreeListPaddedTo : public CentralFreeList { + private: + char pad_[64 - kFreeListSizeMod64]; +}; + +template<> +class CentralFreeListPaddedTo<0> : public CentralFreeList { +}; + +class CentralFreeListPadded : public CentralFreeListPaddedTo< + sizeof(CentralFreeList) % 64> { +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_CENTRAL_FREELIST_H_ diff --git a/src/third_party/gperftools-2.7/src/common.cc b/src/third_party/gperftools-2.7/src/common.cc new file mode 100644 index 00000000000..203afdf9f5c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/common.cc @@ -0,0 +1,291 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <stdlib.h> // for getenv and strtol +#include "config.h" +#include "common.h" +#include "system-alloc.h" +#include "base/spinlock.h" +#include "getenv_safe.h" // TCMallocGetenvSafe + +namespace tcmalloc { + +// Define the maximum number of object per classe type to transfer between +// thread and central caches. +static int32 FLAGS_tcmalloc_transfer_num_objects; + +static const int32 kDefaultTransferNumObjecs = 32; + +// The init function is provided to explicit initialize the variable value +// from the env. var to avoid C++ global construction that might defer its +// initialization after a malloc/new call. +static inline void InitTCMallocTransferNumObjects() +{ + if (FLAGS_tcmalloc_transfer_num_objects == 0) { + const char *envval = TCMallocGetenvSafe("TCMALLOC_TRANSFER_NUM_OBJ"); + FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs : + strtol(envval, NULL, 10); + } +} + +// Note: the following only works for "n"s that fit in 32-bits, but +// that is fine since we only use it for small sizes. +static inline int LgFloor(size_t n) { + int log = 0; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + size_t x = n >> shift; + if (x != 0) { + n = x; + log += shift; + } + } + ASSERT(n == 1); + return log; +} + +int AlignmentForSize(size_t size) { + int alignment = kAlignment; + if (size > kMaxSize) { + // Cap alignment at kPageSize for large sizes. + alignment = kPageSize; + } else if (size >= 128) { + // Space wasted due to alignment is at most 1/8, i.e., 12.5%. + alignment = (1 << LgFloor(size)) / 8; + } else if (size >= kMinAlign) { + // We need an alignment of at least 16 bytes to satisfy + // requirements for some SSE types. + alignment = kMinAlign; + } + // Maximum alignment allowed is page size alignment. + if (alignment > kPageSize) { + alignment = kPageSize; + } + CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign); + CHECK_CONDITION((alignment & (alignment - 1)) == 0); + return alignment; +} + +int SizeMap::NumMoveSize(size_t size) { + if (size == 0) return 0; + // Use approx 64k transfers between thread and central caches. + int num = static_cast<int>(64.0 * 1024.0 / size); + if (num < 2) num = 2; + + // Avoid bringing too many objects into small object free lists. + // If this value is too large: + // - We waste memory with extra objects sitting in the thread caches. + // - The central freelist holds its lock for too long while + // building a linked list of objects, slowing down the allocations + // of other threads. + // If this value is too small: + // - We go to the central freelist too often and we have to acquire + // its lock each time. + // This value strikes a balance between the constraints above. + if (num > FLAGS_tcmalloc_transfer_num_objects) + num = FLAGS_tcmalloc_transfer_num_objects; + + return num; +} + +// Initialize the mapping arrays +void SizeMap::Init() { + InitTCMallocTransferNumObjects(); + + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] + if (ClassIndex(0) != 0) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for size 0", ClassIndex(0)); + } + if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { + Log(kCrash, __FILE__, __LINE__, + "Invalid class index for kMaxSize", ClassIndex(kMaxSize)); + } + + // Compute the size classes we want to use + int sc = 1; // Next size class to assign + int alignment = kAlignment; + CHECK_CONDITION(kAlignment <= kMinAlign); + for (size_t size = kAlignment; size <= kMaxSize; size += alignment) { + alignment = AlignmentForSize(size); + CHECK_CONDITION((size % alignment) == 0); + + int blocks_to_move = NumMoveSize(size) / 4; + size_t psize = 0; + do { + psize += kPageSize; + // Allocate enough pages so leftover is less than 1/8 of total. + // This bounds wasted space to at most 12.5%. + while ((psize % size) > (psize >> 3)) { + psize += kPageSize; + } + // Continue to add pages until there are at least as many objects in + // the span as are needed when moving objects from the central + // freelists and spans to the thread caches. + } while ((psize / size) < (blocks_to_move)); + const size_t my_pages = psize >> kPageShift; + + if (sc > 1 && my_pages == class_to_pages_[sc-1]) { + // See if we can merge this into the previous class without + // increasing the fragmentation of the previous class. + const size_t my_objects = (my_pages << kPageShift) / size; + const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift) + / class_to_size_[sc-1]; + if (my_objects == prev_objects) { + // Adjust last class to include this size + class_to_size_[sc-1] = size; + continue; + } + } + + // Add new class + class_to_pages_[sc] = my_pages; + class_to_size_[sc] = size; + sc++; + } + num_size_classes = sc; + if (sc > kClassSizesMax) { + Log(kCrash, __FILE__, __LINE__, + "too many size classes: (found vs. max)", sc, kClassSizesMax); + } + + // Initialize the mapping arrays + int next_size = 0; + for (int c = 1; c < num_size_classes; c++) { + const int max_size_in_class = class_to_size_[c]; + for (int s = next_size; s <= max_size_in_class; s += kAlignment) { + class_array_[ClassIndex(s)] = c; + } + next_size = max_size_in_class + kAlignment; + } + + // Double-check sizes just to be safe + for (size_t size = 0; size <= kMaxSize;) { + const int sc = SizeClass(size); + if (sc <= 0 || sc >= num_size_classes) { + Log(kCrash, __FILE__, __LINE__, + "Bad size class (class, size)", sc, size); + } + if (sc > 1 && size <= class_to_size_[sc-1]) { + Log(kCrash, __FILE__, __LINE__, + "Allocating unnecessarily large class (class, size)", sc, size); + } + const size_t s = class_to_size_[sc]; + if (size > s || s == 0) { + Log(kCrash, __FILE__, __LINE__, + "Bad (class, size, requested)", sc, s, size); + } + if (size <= kMaxSmallSize) { + size += 8; + } else { + size += 128; + } + } + + // Our fast-path aligned allocation functions rely on 'naturally + // aligned' sizes to produce aligned addresses. Lets check if that + // holds for size classes that we produced. + // + // I.e. we're checking that + // + // align = (1 << shift), malloc(i * align) % align == 0, + // + // for all align values up to kPageSize. + for (size_t align = kMinAlign; align <= kPageSize; align <<= 1) { + for (size_t size = align; size < kPageSize; size += align) { + CHECK_CONDITION(class_to_size_[SizeClass(size)] % align == 0); + } + } + + // Initialize the num_objects_to_move array. + for (size_t cl = 1; cl < num_size_classes; ++cl) { + num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl)); + } +} + +// Metadata allocator -- keeps stats about how many bytes allocated. +static uint64_t metadata_system_bytes_ = 0; +static const size_t kMetadataAllocChunkSize = 8*1024*1024; +// As ThreadCache objects are allocated with MetaDataAlloc, and also +// CACHELINE_ALIGNED, we must use the same alignment as TCMalloc_SystemAlloc. +static const size_t kMetadataAllignment = sizeof(MemoryAligner); + +static char *metadata_chunk_alloc_; +static size_t metadata_chunk_avail_; + +static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED); + +void* MetaDataAlloc(size_t bytes) { + if (bytes >= kMetadataAllocChunkSize) { + void *rv = TCMalloc_SystemAlloc(bytes, + NULL, kMetadataAllignment); + if (rv != NULL) { + metadata_system_bytes_ += bytes; + } + return rv; + } + + SpinLockHolder h(&metadata_alloc_lock); + + // the following works by essentially turning address to integer of + // log_2 kMetadataAllignment size and negating it. I.e. negated + // value + original value gets 0 and that's what we want modulo + // kMetadataAllignment. Note, we negate before masking higher bits + // off, otherwise we'd have to mask them off after negation anyways. + intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1); + + if (metadata_chunk_avail_ < bytes + alignment) { + size_t real_size; + void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize, + &real_size, kMetadataAllignment); + if (ptr == NULL) { + return NULL; + } + + metadata_chunk_alloc_ = static_cast<char *>(ptr); + metadata_chunk_avail_ = real_size; + + alignment = 0; + } + + void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment); + bytes += alignment; + metadata_chunk_alloc_ += bytes; + metadata_chunk_avail_ -= bytes; + metadata_system_bytes_ += bytes; + return rv; +} + +uint64_t metadata_system_bytes() { return metadata_system_bytes_; } + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/common.h b/src/third_party/gperftools-2.7/src/common.h new file mode 100644 index 00000000000..cb45315ef7d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/common.h @@ -0,0 +1,311 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Common definitions for tcmalloc code. + +#ifndef TCMALLOC_COMMON_H_ +#define TCMALLOC_COMMON_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t, uint64_t +#endif +#include "internal_logging.h" // for ASSERT, etc +#include "base/basictypes.h" // for LIKELY, etc + +// Type that can hold a page number +typedef uintptr_t PageID; + +// Type that can hold the length of a run of pages +typedef uintptr_t Length; + +//------------------------------------------------------------------- +// Configuration +//------------------------------------------------------------------- + +#if defined(TCMALLOC_ALIGN_8BYTES) +// Unless we force to use 8 bytes alignment we use an alignment of +// at least 16 bytes to statisfy requirements for some SSE types. +// Keep in mind when using the 16 bytes alignment you can have a space +// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes) +static const size_t kMinAlign = 8; +#else +static const size_t kMinAlign = 16; +#endif + +// Using large pages speeds up the execution at a cost of larger memory use. +// Deallocation may speed up by a factor as the page map gets 8x smaller, so +// lookups in the page map result in fewer L2 cache misses, which translates to +// speedup for application/platform combinations with high L2 cache pressure. +// As the number of size classes increases with large pages, we increase +// the thread cache allowance to avoid passing more free ranges to and from +// central lists. Also, larger pages are less likely to get freed. +// These two factors cause a bounded increase in memory use. +#if defined(TCMALLOC_32K_PAGES) +static const size_t kPageShift = 15; +#elif defined(TCMALLOC_64K_PAGES) +static const size_t kPageShift = 16; +#else +static const size_t kPageShift = 13; +#endif + +static const size_t kClassSizesMax = 96; + +static const size_t kMaxThreadCacheSize = 4 << 20; + +static const size_t kPageSize = 1 << kPageShift; +static const size_t kMaxSize = 256 * 1024; +static const size_t kAlignment = 8; +// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap. +static const size_t kMaxPages = 1 << (20 - kPageShift); + +// Default bound on the total amount of thread caches. +#ifdef TCMALLOC_SMALL_BUT_SLOW +// Make the overall thread cache no bigger than that of a single thread +// for the small memory footprint case. +static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; +#else +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +#endif + +// Lower bound on the per-thread cache sizes +static const size_t kMinThreadCacheSize = kMaxSize * 2; + +// The number of bytes one ThreadCache will steal from another when +// the first ThreadCache is forced to Scavenge(), delaying the +// next call to Scavenge for this thread. +static const size_t kStealAmount = 1 << 16; + +// The number of times that a deallocation can cause a freelist to +// go over its max_length() before shrinking max_length(). +static const int kMaxOverages = 3; + +// Maximum length we allow a per-thread free-list to have before we +// move objects from it into the corresponding central free-list. We +// want this big to avoid locking the central free-list too often. It +// should not hurt to make this list somewhat big because the +// scavenging code will shrink it down when its contents are not in use. +static const int kMaxDynamicFreeListLength = 8192; + +static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; + +#if __aarch64__ || __x86_64__ || _M_AMD64 || _M_ARM64 +// All current x86_64 processors only look at the lower 48 bits in +// virtual to physical address translation. The top 16 are all same as +// bit 47. And bit 47 value 1 reserved for kernel-space addresses in +// practice. So it is actually 47 usable bits from malloc +// perspective. This lets us use faster two level page maps on this +// architecture. +// +// There is very similar story on 64-bit arms except it has full 48 +// bits for user-space. Because of that, and because in principle OSes +// can start giving some of highest-bit-set addresses to user-space, +// we don't bother to limit x86 to 47 bits. +// +// As of now there are published plans to add more bits to x86-64 +// virtual address space, but since 48 bits has been norm for long +// time and lots of software is relying on it, it will be opt-in from +// OS perspective. So we can keep doing "48 bits" at least for now. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +#else +// mipsen and ppcs have more general hardware so we have to support +// full 64-bits of addresses. +static const int kAddressBits = 8 * sizeof(void*); +#endif + +namespace tcmalloc { + +// Convert byte size into pages. This won't overflow, but may return +// an unreasonably large value if bytes is huge enough. +inline Length pages(size_t bytes) { + return (bytes >> kPageShift) + + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); +} + +// For larger allocation sizes, we use larger memory alignments to +// reduce the number of size classes. +int AlignmentForSize(size_t size); + +// Size-class information + mapping +class SizeMap { + private: + //------------------------------------------------------------------- + // Mapping from size to size_class and vice versa + //------------------------------------------------------------------- + + // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an + // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. + // So for these larger sizes we have an array indexed by ceil(size/128). + // + // We flatten both logical arrays into one physical array and use + // arithmetic to compute an appropriate index. The constants used by + // ClassIndex() were selected to make the flattening work. + // + // Examples: + // Size Expression Index + // ------------------------------------------------------- + // 0 (0 + 7) / 8 0 + // 1 (1 + 7) / 8 1 + // ... + // 1024 (1024 + 7) / 8 128 + // 1025 (1025 + 127 + (120<<7)) / 128 129 + // ... + // 32768 (32768 + 127 + (120<<7)) / 128 376 + static const int kMaxSmallSize = 1024; + static const size_t kClassArraySize = + ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; + unsigned char class_array_[kClassArraySize]; + + static inline size_t SmallSizeClass(size_t s) { + return (static_cast<uint32_t>(s) + 7) >> 3; + } + + static inline size_t LargeSizeClass(size_t s) { + return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7; + } + + // If size is no more than kMaxSize, compute index of the + // class_array[] entry for it, putting the class index in output + // parameter idx and returning true. Otherwise return false. + static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s, + uint32* idx) { + if (PREDICT_TRUE(s <= kMaxSmallSize)) { + *idx = (static_cast<uint32>(s) + 7) >> 3; + return true; + } else if (s <= kMaxSize) { + *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7; + return true; + } + return false; + } + + // Compute index of the class_array[] entry for a given size + static inline size_t ClassIndex(size_t s) { + // Use unsigned arithmetic to avoid unnecessary sign extensions. + ASSERT(0 <= s); + ASSERT(s <= kMaxSize); + if (PREDICT_TRUE(s <= kMaxSmallSize)) { + return SmallSizeClass(s); + } else { + return LargeSizeClass(s); + } + } + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + int num_objects_to_move_[kClassSizesMax]; + + int NumMoveSize(size_t size); + + // Mapping from size class to max size storable in that class + int32 class_to_size_[kClassSizesMax]; + + // Mapping from size class to number of pages to allocate at a time + size_t class_to_pages_[kClassSizesMax]; + + public: + size_t num_size_classes; + + // Constructor should do nothing since we rely on explicit Init() + // call, which may or may not be called before the constructor runs. + SizeMap() { } + + // Initialize the mapping arrays + void Init(); + + inline int SizeClass(size_t size) { + return class_array_[ClassIndex(size)]; + } + + // Check if size is small enough to be representable by a size + // class, and if it is, put matching size class into *cl. Returns + // true iff matching size class was found. + inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) { + uint32 idx; + if (!ClassIndexMaybe(size, &idx)) { + return false; + } + *cl = class_array_[idx]; + return true; + } + + // Get the byte-size for a specified class + inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to max size storable in that class + inline int32 class_to_size(uint32 cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to number of pages to allocate at a time + inline size_t class_to_pages(uint32 cl) { + return class_to_pages_[cl]; + } + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + inline int num_objects_to_move(uint32 cl) { + return num_objects_to_move_[cl]; + } +}; + +// Allocates "bytes" worth of memory and returns it. Increments +// metadata_system_bytes appropriately. May return NULL if allocation +// fails. Requires pageheap_lock is held. +void* MetaDataAlloc(size_t bytes); + +// Returns the total number of bytes allocated from the system. +// Requires pageheap_lock is held. +uint64_t metadata_system_bytes(); + +// size/depth are made the same size as a pointer so that some generic +// code below can conveniently cast them back and forth to void*. +static const int kMaxStackDepth = 31; +struct StackTrace { + uintptr_t size; // Size of object + uintptr_t depth; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_COMMON_H_ diff --git a/src/third_party/gperftools-2.7/src/config.h.in b/src/third_party/gperftools-2.7/src/config.h.in new file mode 100644 index 00000000000..5f2a58c3ad7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/config.h.in @@ -0,0 +1,310 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + + +#ifndef GPERFTOOLS_CONFIG_H_ +#define GPERFTOOLS_CONFIG_H_ + + +/* Build new/delete operators for overaligned types */ +#undef ENABLE_ALIGNED_NEW_DELETE + +/* Build runtime detection for sized delete */ +#undef ENABLE_DYNAMIC_SIZED_DELETE + +/* Build sized deletion operators */ +#undef ENABLE_SIZED_DELETE + +/* Define to 1 if compiler supports __builtin_expect */ +#undef HAVE_BUILTIN_EXPECT + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +#undef HAVE_BUILTIN_STACK_POINTER + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +#undef HAVE_CONFLICT_SIGNAL_H + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +#undef HAVE_DECL_BACKTRACE + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#undef HAVE_DECL_CFREE + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +#undef HAVE_DECL_NANOSLEEP + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#undef HAVE_DECL_PVALLOC + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +#undef HAVE_DECL_SLEEP + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#undef HAVE_DECL_UNAME + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#undef HAVE_DECL_VALLOC + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#undef HAVE_ELF32_VERSYM + +/* Define to 1 if you have the <execinfo.h> header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + +/* Define to 1 if you have the `fork' function. */ +#undef HAVE_FORK + +/* Define to 1 if you have the `geteuid' function. */ +#undef HAVE_GETEUID + +/* Define to 1 if you have the `getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define to 1 if you have the <glob.h> header file. */ +#undef HAVE_GLOB_H + +/* Define to 1 if you have the <grp.h> header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <libunwind.h> header file. */ +#undef HAVE_LIBUNWIND_H + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#undef HAVE_LINUX_PTRACE_H + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +#undef HAVE_LINUX_SIGEV_THREAD_ID + +/* Define to 1 if you have the <malloc.h> header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* define if the compiler implements namespaces */ +#undef HAVE_NAMESPACES + +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + +/* define if libc has program_invocation_name */ +#undef HAVE_PROGRAM_INVOCATION_NAME + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +#undef HAVE_PTHREAD_DESPITE_ASKING_FOR + +/* Define to 1 if you have the <pwd.h> header file. */ +#undef HAVE_PWD_H + +/* Define to 1 if you have the `sbrk' function. */ +#undef HAVE_SBRK + +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#undef HAVE_STRUCT_MALLINFO + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#undef HAVE_SYS_PRCTL_H + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#undef HAVE_SYS_SYSCALL_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#undef HAVE_SYS_UCONTEXT_H + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if compiler supports __thread */ +#undef HAVE_TLS + +/* Define to 1 if you have the <ucontext.h> header file. */ +#undef HAVE_UCONTEXT_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +#undef HAVE_UNWIND_BACKTRACE + +/* Define to 1 if you have the <unwind.h> header file. */ +#undef HAVE_UNWIND_H + +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* define if your compiler supports alignment of functions */ +#undef HAVE___ATTRIBUTE__ALIGNED_FN + +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + +/* Define to 1 if the system has the type `__int64'. */ +#undef HAVE___INT64 + +/* prefix where we look for installed files */ +#undef INSTALL_PREFIX + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#undef INT32_EQUALS_INTPTR + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* How to access the PC from a struct ucontext */ +#undef PC_FROM_UCONTEXT + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#undef PERFTOOLS_DLL_DECL + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIdS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIuS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIxS + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* the namespace where STL code like vector<> is defined */ +#undef STL_NAMESPACE + +/* Define 32K of internal pages size for tcmalloc */ +#undef TCMALLOC_32K_PAGES + +/* Define 64K of internal pages size for tcmalloc */ +#undef TCMALLOC_64K_PAGES + +/* Define 8 bytes of allocation alignment for tcmalloc */ +#undef TCMALLOC_ALIGN_8BYTES + +/* Version number of package */ +#undef VERSION + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + +#endif /* #ifndef GPERFTOOLS_CONFIG_H_ */ + diff --git a/src/third_party/gperftools-2.7/src/config_for_unittests.h b/src/third_party/gperftools-2.7/src/config_for_unittests.h new file mode 100644 index 00000000000..66592a70071 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/config_for_unittests.h @@ -0,0 +1,65 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Craig Silverstein +// +// This file is needed for windows -- unittests are not part of the +// perftools dll, but still want to include config.h just like the +// dll does, so they can use internal tools and APIs for testing. +// +// The problem is that config.h declares PERFTOOLS_DLL_DECL to be +// for exporting symbols, but the unittest needs to *import* symbols +// (since it's not the dll). +// +// The solution is to have this file, which is just like config.h but +// sets PERFTOOLS_DLL_DECL to do a dllimport instead of a dllexport. +// +// The reason we need this extra PERFTOOLS_DLL_DECL_FOR_UNITTESTS +// variable is in case people want to set PERFTOOLS_DLL_DECL explicitly +// to something other than __declspec(dllexport). In that case, they +// may want to use something other than __declspec(dllimport) for the +// unittest case. For that, we allow folks to define both +// PERFTOOLS_DLL_DECL and PERFTOOLS_DLL_DECL_FOR_UNITTESTS explicitly. +// +// NOTE: This file is equivalent to config.h on non-windows systems, +// which never defined PERFTOOLS_DLL_DECL_FOR_UNITTESTS and always +// define PERFTOOLS_DLL_DECL to the empty string. + +#include "config.h" + +#undef PERFTOOLS_DLL_DECL +#ifdef PERFTOOLS_DLL_DECL_FOR_UNITTESTS +# define PERFTOOLS_DLL_DECL PERFTOOLS_DLL_DECL_FOR_UNITTESTS +#else +# define PERFTOOLS_DLL_DECL // if DLL_DECL_FOR_UNITTESTS isn't defined, use "" +#endif diff --git a/src/third_party/gperftools-2.7/src/debugallocation.cc b/src/third_party/gperftools-2.7/src/debugallocation.cc new file mode 100644 index 00000000000..7c438f25bc5 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/debugallocation.cc @@ -0,0 +1,1583 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2000, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Urs Holzle <opensource@google.com> + +#include "config.h" +#include <errno.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +// We only need malloc.h for struct mallinfo. +#ifdef HAVE_STRUCT_MALLINFO +// Malloc can be in several places on older versions of OS X. +# if defined(HAVE_MALLOC_H) +# include <malloc.h> +# elif defined(HAVE_MALLOC_MALLOC_H) +# include <malloc/malloc.h> +# elif defined(HAVE_SYS_MALLOC_H) +# include <sys/malloc.h> +# endif +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include <gperftools/malloc_extension.h> +#include <gperftools/malloc_hook.h> +#include <gperftools/stacktrace.h> +#include "addressmap-inl.h" +#include "base/commandlineflags.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "malloc_hook-inl.h" +#include "symbolize.h" + +// NOTE: due to #define below, tcmalloc.cc will omit tc_XXX +// definitions. So that debug implementations can be defined +// instead. We're going to use do_malloc, do_free and other do_XXX +// functions that are defined in tcmalloc.cc for actual memory +// management +#define TCMALLOC_USING_DEBUGALLOCATION +#include "tcmalloc.cc" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// ========================================================================= // + +DEFINE_bool(malloctrace, + EnvToBool("TCMALLOC_TRACE", false), + "Enables memory (de)allocation tracing to /tmp/google.alloc."); +#ifdef HAVE_MMAP +DEFINE_bool(malloc_page_fence, + EnvToBool("TCMALLOC_PAGE_FENCE", false), + "Enables putting of memory allocations at page boundaries " + "with a guard page following the allocation (to catch buffer " + "overruns right when they happen)."); +DEFINE_bool(malloc_page_fence_never_reclaim, + EnvToBool("TCMALLOC_PAGE_FENCE_NEVER_RECLAIM", false), + "Enables making the virtual address space inaccessible " + "upon a deallocation instead of returning it and reusing later."); +#else +DEFINE_bool(malloc_page_fence, false, "Not usable (requires mmap)"); +DEFINE_bool(malloc_page_fence_never_reclaim, false, "Not usable (required mmap)"); +#endif +DEFINE_bool(malloc_reclaim_memory, + EnvToBool("TCMALLOC_RECLAIM_MEMORY", true), + "If set to false, we never return memory to malloc " + "when an object is deallocated. This ensures that all " + "heap object addresses are unique."); +DEFINE_int32(max_free_queue_size, + EnvToInt("TCMALLOC_MAX_FREE_QUEUE_SIZE", 10*1024*1024), + "If greater than 0, keep freed blocks in a queue instead of " + "releasing them to the allocator immediately. Release them when " + "the total size of all blocks in the queue would otherwise exceed " + "this limit."); + +DEFINE_bool(symbolize_stacktrace, + EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true), + "Symbolize the stack trace when provided (on some error exits)"); + +// If we are LD_PRELOAD-ed against a non-pthreads app, then +// pthread_once won't be defined. We declare it here, for that +// case (with weak linkage) which will cause the non-definition to +// resolve to NULL. We can then check for NULL or not in Instance. +extern "C" int pthread_once(pthread_once_t *, void (*)(void)) + ATTRIBUTE_WEAK; + +// ========================================================================= // + +// A safe version of printf() that does not do any allocation and +// uses very little stack space. +static void TracePrintf(int fd, const char *fmt, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); + +// Round "value" up to next "alignment" boundary. +// Requires that "alignment" be a power of two. +static intptr_t RoundUp(intptr_t value, intptr_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +// ========================================================================= // + +class MallocBlock; + +// A circular buffer to hold freed blocks of memory. MallocBlock::Deallocate +// (below) pushes blocks into this queue instead of returning them to the +// underlying allocator immediately. See MallocBlock::Deallocate for more +// information. +// +// We can't use an STL class for this because we need to be careful not to +// perform any heap de-allocations in any of the code in this class, since the +// code in MallocBlock::Deallocate is not re-entrant. +template <typename QueueEntry> +class FreeQueue { + public: + FreeQueue() : q_front_(0), q_back_(0) {} + + bool Full() { + return (q_front_ + 1) % kFreeQueueSize == q_back_; + } + + void Push(const QueueEntry& block) { + q_[q_front_] = block; + q_front_ = (q_front_ + 1) % kFreeQueueSize; + } + + QueueEntry Pop() { + RAW_CHECK(q_back_ != q_front_, "Queue is empty"); + const QueueEntry& ret = q_[q_back_]; + q_back_ = (q_back_ + 1) % kFreeQueueSize; + return ret; + } + + size_t size() const { + return (q_front_ - q_back_ + kFreeQueueSize) % kFreeQueueSize; + } + + private: + // Maximum number of blocks kept in the free queue before being freed. + static const int kFreeQueueSize = 1024; + + QueueEntry q_[kFreeQueueSize]; + int q_front_; + int q_back_; +}; + +struct MallocBlockQueueEntry { + MallocBlockQueueEntry() : block(NULL), size(0), + num_deleter_pcs(0), deleter_threadid(0) {} + MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) { + if (FLAGS_max_free_queue_size != 0 && b != NULL) { + // Adjust the number of frames to skip (4) if you change the + // location of this call. + num_deleter_pcs = + MallocHook::GetCallerStackTrace( + deleter_pcs, + sizeof(deleter_pcs) / sizeof(deleter_pcs[0]), + 4); + deleter_threadid = pthread_self(); + } else { + num_deleter_pcs = 0; + // Zero is an illegal pthread id by my reading of the pthread + // implementation: + deleter_threadid = 0; + } + } + + MallocBlock* block; + size_t size; + + // When deleted and put in the free queue, we (flag-controlled) + // record the stack so that if corruption is later found, we can + // print the deleter's stack. (These three vars add 144 bytes of + // overhead under the LP64 data model.) + void* deleter_pcs[16]; + int num_deleter_pcs; + pthread_t deleter_threadid; +}; + +class MallocBlock { + public: // allocation type constants + + // Different allocation types we distinguish. + // Note: The lower 4 bits are not random: we index kAllocName array + // by these values masked with kAllocTypeMask; + // the rest are "random" magic bits to help catch memory corruption. + static const int kMallocType = 0xEFCDAB90; + static const int kNewType = 0xFEBADC81; + static const int kArrayNewType = 0xBCEADF72; + + private: // constants + + // A mask used on alloc types above to get to 0, 1, 2 + static const int kAllocTypeMask = 0x3; + // An additional bit to set in AllocType constants + // to mark now deallocated regions. + static const int kDeallocatedTypeBit = 0x4; + + // For better memory debugging, we initialize all storage to known + // values, and overwrite the storage when it's deallocated: + // Byte that fills uninitialized storage. + static const int kMagicUninitializedByte = 0xAB; + // Byte that fills deallocated storage. + // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte + // to work around a bug in the pthread library. + static const int kMagicDeletedByte = 0xCD; + // A size_t (type of alloc_type_ below) in a deallocated storage + // filled with kMagicDeletedByte. + static const size_t kMagicDeletedSizeT = + 0xCDCDCDCD | (((size_t)0xCDCDCDCD << 16) << 16); + // Initializer works for 32 and 64 bit size_ts; + // "<< 16 << 16" is to fool gcc from issuing a warning + // when size_ts are 32 bits. + + // NOTE: on Linux, you can enable malloc debugging support in libc by + // setting the environment variable MALLOC_CHECK_ to 1 before you + // start the program (see man malloc). + + // We use either do_malloc or mmap to make the actual allocation. In + // order to remember which one of the two was used for any block, we store an + // appropriate magic word next to the block. + static const size_t kMagicMalloc = 0xDEADBEEF; + static const size_t kMagicMMap = 0xABCDEFAB; + + // This array will be filled with 0xCD, for use with memcmp. + static unsigned char kMagicDeletedBuffer[1024]; + static pthread_once_t deleted_buffer_initialized_; + static bool deleted_buffer_initialized_no_pthreads_; + + private: // data layout + + // The four fields size1_,offset_,magic1_,alloc_type_ + // should together occupy a multiple of 16 bytes. (At the + // moment, sizeof(size_t) == 4 or 8 depending on piii vs + // k8, and 4 of those sum to 16 or 32 bytes). + // This, combined with do_malloc's alignment guarantees, + // ensures that SSE types can be stored into the returned + // block, at &size2_. + size_t size1_; + size_t offset_; // normally 0 unless memaligned memory + // see comments in memalign() and FromRawPointer(). + size_t magic1_; + size_t alloc_type_; + // here comes the actual data (variable length) + // ... + // then come the size2_ and magic2_, or a full page of mprotect-ed memory + // if the malloc_page_fence feature is enabled. + size_t size2_; + size_t magic2_; + + private: // static data and helpers + + // Allocation map: stores the allocation type for each allocated object, + // or the type or'ed with kDeallocatedTypeBit + // for each formerly allocated object. + typedef AddressMap<int> AllocMap; + static AllocMap* alloc_map_; + // This protects alloc_map_ and consistent state of metadata + // for each still-allocated object in it. + // We use spin locks instead of pthread_mutex_t locks + // to prevent crashes via calls to pthread_mutex_(un)lock + // for the (de)allocations coming from pthreads initialization itself. + static SpinLock alloc_map_lock_; + + // A queue of freed blocks. Instead of releasing blocks to the allocator + // immediately, we put them in a queue, freeing them only when necessary + // to keep the total size of all the freed blocks below the limit set by + // FLAGS_max_free_queue_size. + static FreeQueue<MallocBlockQueueEntry>* free_queue_; + + static size_t free_queue_size_; // total size of blocks in free_queue_ + // protects free_queue_ and free_queue_size_ + static SpinLock free_queue_lock_; + + // Names of allocation types (kMallocType, kNewType, kArrayNewType) + static const char* const kAllocName[]; + // Names of corresponding deallocation types + static const char* const kDeallocName[]; + + static const char* AllocName(int type) { + return kAllocName[type & kAllocTypeMask]; + } + + static const char* DeallocName(int type) { + return kDeallocName[type & kAllocTypeMask]; + } + + private: // helper accessors + + bool IsMMapped() const { return kMagicMMap == magic1_; } + + bool IsValidMagicValue(size_t value) const { + return kMagicMMap == value || kMagicMalloc == value; + } + + static size_t real_malloced_size(size_t size) { + return size + sizeof(MallocBlock); + } + + /* + * Here we assume size of page is kMinAlign aligned, + * so if size is MALLOC_ALIGNMENT aligned too, then we could + * guarantee return address is also kMinAlign aligned, because + * mmap return address at nearby page boundary on Linux. + */ + static size_t real_mmapped_size(size_t size) { + size_t tmp = size + MallocBlock::data_offset(); + tmp = RoundUp(tmp, kMinAlign); + return tmp; + } + + size_t real_size() { + return IsMMapped() ? real_mmapped_size(size1_) : real_malloced_size(size1_); + } + + // NOTE: if the block is mmapped (that is, we're using the + // malloc_page_fence option) then there's no size2 or magic2 + // (instead, the guard page begins where size2 would be). + + size_t* size2_addr() { return (size_t*)((char*)&size2_ + size1_); } + const size_t* size2_addr() const { + return (const size_t*)((char*)&size2_ + size1_); + } + + size_t* magic2_addr() { return (size_t*)(size2_addr() + 1); } + const size_t* magic2_addr() const { return (const size_t*)(size2_addr() + 1); } + + private: // other helpers + + void Initialize(size_t size, int type) { + RAW_CHECK(IsValidMagicValue(magic1_), ""); + // record us as allocated in the map + alloc_map_lock_.Lock(); + if (!alloc_map_) { + void* p = do_malloc(sizeof(AllocMap)); + alloc_map_ = new(p) AllocMap(do_malloc, do_free); + } + alloc_map_->Insert(data_addr(), type); + // initialize us + size1_ = size; + offset_ = 0; + alloc_type_ = type; + if (!IsMMapped()) { + bit_store(magic2_addr(), &magic1_); + bit_store(size2_addr(), &size); + } + alloc_map_lock_.Unlock(); + memset(data_addr(), kMagicUninitializedByte, size); + if (!IsMMapped()) { + RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold"); + RAW_CHECK(memcmp(&magic1_, magic2_addr(), sizeof(magic1_)) == 0, "should hold"); + } + } + + size_t CheckAndClear(int type, size_t given_size) { + alloc_map_lock_.Lock(); + CheckLocked(type); + if (!IsMMapped()) { + RAW_CHECK(memcmp(&size1_, size2_addr(), sizeof(size1_)) == 0, "should hold"); + } + // record us as deallocated in the map + alloc_map_->Insert(data_addr(), type | kDeallocatedTypeBit); + alloc_map_lock_.Unlock(); + // clear us + const size_t size = real_size(); + RAW_CHECK(!given_size || given_size == size1_, + "right size must be passed to sized delete"); + memset(this, kMagicDeletedByte, size); + return size; + } + + void CheckLocked(int type) const { + int map_type = 0; + const int* found_type = + alloc_map_ != NULL ? alloc_map_->Find(data_addr()) : NULL; + if (found_type == NULL) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has never been allocated", data_addr()); + } else { + map_type = *found_type; + } + if ((map_type & kDeallocatedTypeBit) != 0) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has been already deallocated (it was allocated with %s)", + data_addr(), AllocName(map_type & ~kDeallocatedTypeBit)); + } + if (alloc_type_ == kMagicDeletedSizeT) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; or else the object has been already " + "deallocated and our memory map has been corrupted", + data_addr()); + } + if (!IsValidMagicValue(magic1_)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; " + "or else our memory map has been corrupted and this is a " + "deallocation for not (currently) heap-allocated object", + data_addr()); + } + if (!IsMMapped()) { + if (memcmp(&size1_, size2_addr(), sizeof(size1_))) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + size_t addr; + bit_store(&addr, magic2_addr()); + if (!IsValidMagicValue(addr)) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + } + if (alloc_type_ != type) { + if ((alloc_type_ != MallocBlock::kMallocType) && + (alloc_type_ != MallocBlock::kNewType) && + (alloc_type_ != MallocBlock::kArrayNewType)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted", data_addr()); + } + RAW_LOG(FATAL, "memory allocation/deallocation mismatch at %p: " + "allocated with %s being deallocated with %s", + data_addr(), AllocName(alloc_type_), DeallocName(type)); + } + if (alloc_type_ != map_type) { + RAW_LOG(FATAL, "memory stomping bug: our memory map has been corrupted : " + "allocation at %p made with %s " + "is recorded in the map to be made with %s", + data_addr(), AllocName(alloc_type_), AllocName(map_type)); + } + } + + public: // public accessors + + void* data_addr() { return (void*)&size2_; } + const void* data_addr() const { return (const void*)&size2_; } + + static size_t data_offset() { return OFFSETOF_MEMBER(MallocBlock, size2_); } + + size_t data_size() const { return size1_; } + + void set_offset(int offset) { this->offset_ = offset; } + + public: // our main interface + + static MallocBlock* Allocate(size_t size, int type) { + // Prevent an integer overflow / crash with large allocation sizes. + // TODO - Note that for a e.g. 64-bit size_t, max_size_t may not actually + // be the maximum value, depending on how the compiler treats ~0. The worst + // practical effect is that allocations are limited to 4Gb or so, even if + // the address space could take more. + static size_t max_size_t = ~0; + if (size > max_size_t - sizeof(MallocBlock)) { + RAW_LOG(ERROR, "Massive size passed to malloc: %" PRIuS "", size); + return NULL; + } + MallocBlock* b = NULL; + const bool use_malloc_page_fence = FLAGS_malloc_page_fence; +#ifdef HAVE_MMAP + if (use_malloc_page_fence) { + // Put the block towards the end of the page and make the next page + // inaccessible. This will catch buffer overrun right when it happens. + size_t sz = real_mmapped_size(size); + int pagesize = getpagesize(); + int num_pages = (sz + pagesize - 1) / pagesize + 1; + char* p = (char*) mmap(NULL, num_pages * pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + // If the allocation fails, abort rather than returning NULL to + // malloc. This is because in most cases, the program will run out + // of memory in this mode due to tremendous amount of wastage. There + // is no point in propagating the error elsewhere. + RAW_LOG(FATAL, "Out of memory: possibly due to page fence overhead: %s", + strerror(errno)); + } + // Mark the page after the block inaccessible + if (mprotect(p + (num_pages - 1) * pagesize, pagesize, PROT_NONE)) { + RAW_LOG(FATAL, "Guard page setup failed: %s", strerror(errno)); + } + b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz); + } else { + b = (MallocBlock*) do_malloc(real_malloced_size(size)); + } +#else + b = (MallocBlock*) do_malloc(real_malloced_size(size)); +#endif + + // It would be nice to output a diagnostic on allocation failure + // here, but logging (other than FATAL) requires allocating + // memory, which could trigger a nasty recursion. Instead, preserve + // malloc semantics and return NULL on failure. + if (b != NULL) { + b->magic1_ = use_malloc_page_fence ? kMagicMMap : kMagicMalloc; + b->Initialize(size, type); + } + return b; + } + + void Deallocate(int type, size_t given_size) { + if (IsMMapped()) { // have to do this before CheckAndClear +#ifdef HAVE_MMAP + int size = CheckAndClear(type, given_size); + int pagesize = getpagesize(); + int num_pages = (size + pagesize - 1) / pagesize + 1; + char* p = (char*) this; + if (FLAGS_malloc_page_fence_never_reclaim || + !FLAGS_malloc_reclaim_memory) { + mprotect(p - (num_pages - 1) * pagesize + size, + num_pages * pagesize, PROT_NONE); + } else { + munmap(p - (num_pages - 1) * pagesize + size, num_pages * pagesize); + } +#endif + } else { + const size_t size = CheckAndClear(type, given_size); + if (FLAGS_malloc_reclaim_memory) { + // Instead of freeing the block immediately, push it onto a queue of + // recently freed blocks. Free only enough blocks to keep from + // exceeding the capacity of the queue or causing the total amount of + // un-released memory in the queue from exceeding + // FLAGS_max_free_queue_size. + ProcessFreeQueue(this, size, FLAGS_max_free_queue_size); + } + } + } + + static size_t FreeQueueSize() { + SpinLockHolder l(&free_queue_lock_); + return free_queue_size_; + } + + static void ProcessFreeQueue(MallocBlock* b, size_t size, + int max_free_queue_size) { + // MallocBlockQueueEntry are about 144 in size, so we can only + // use a small array of them on the stack. + MallocBlockQueueEntry entries[4]; + int num_entries = 0; + MallocBlockQueueEntry new_entry(b, size); + free_queue_lock_.Lock(); + if (free_queue_ == NULL) + free_queue_ = new FreeQueue<MallocBlockQueueEntry>; + RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!"); + + if (b != NULL) { + free_queue_size_ += size + sizeof(MallocBlockQueueEntry); + free_queue_->Push(new_entry); + } + + // Free blocks until the total size of unfreed blocks no longer exceeds + // max_free_queue_size, and the free queue has at least one free + // space in it. + while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) { + RAW_CHECK(num_entries < arraysize(entries), "entries array overflow"); + entries[num_entries] = free_queue_->Pop(); + free_queue_size_ -= + entries[num_entries].size + sizeof(MallocBlockQueueEntry); + num_entries++; + if (num_entries == arraysize(entries)) { + // The queue will not be full at this point, so it is ok to + // release the lock. The queue may still contain more than + // max_free_queue_size, but this is not a strict invariant. + free_queue_lock_.Unlock(); + for (int i = 0; i < num_entries; i++) { + CheckForDanglingWrites(entries[i]); + do_free(entries[i].block); + } + num_entries = 0; + free_queue_lock_.Lock(); + } + } + free_queue_lock_.Unlock(); + for (int i = 0; i < num_entries; i++) { + CheckForDanglingWrites(entries[i]); + do_free(entries[i].block); + } + } + + static void InitDeletedBuffer() { + memset(kMagicDeletedBuffer, kMagicDeletedByte, sizeof(kMagicDeletedBuffer)); + deleted_buffer_initialized_no_pthreads_ = true; + } + + static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) { + // Initialize the buffer if necessary. + if (pthread_once) + pthread_once(&deleted_buffer_initialized_, &InitDeletedBuffer); + if (!deleted_buffer_initialized_no_pthreads_) { + // This will be the case on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + InitDeletedBuffer(); + } + + const unsigned char* p = + reinterpret_cast<unsigned char*>(queue_entry.block); + + static const size_t size_of_buffer = sizeof(kMagicDeletedBuffer); + const size_t size = queue_entry.size; + const size_t buffers = size / size_of_buffer; + const size_t remainder = size % size_of_buffer; + size_t buffer_idx; + for (buffer_idx = 0; buffer_idx < buffers; ++buffer_idx) { + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, size_of_buffer); + p += size_of_buffer; + } + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, remainder); + } + + static void CheckForCorruptedBuffer(const MallocBlockQueueEntry& queue_entry, + size_t buffer_idx, + const unsigned char* buffer, + size_t size_of_buffer) { + if (memcmp(buffer, kMagicDeletedBuffer, size_of_buffer) == 0) { + return; + } + + RAW_LOG(ERROR, + "Found a corrupted memory buffer in MallocBlock (may be offset " + "from user ptr): buffer index: %zd, buffer ptr: %p, size of " + "buffer: %zd", buffer_idx, buffer, size_of_buffer); + + // The magic deleted buffer should only be 1024 bytes, but in case + // this changes, let's put an upper limit on the number of debug + // lines we'll output: + if (size_of_buffer <= 1024) { + for (int i = 0; i < size_of_buffer; ++i) { + if (buffer[i] != kMagicDeletedByte) { + RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0x%02x).", + i, buffer[i], kMagicDeletedByte); + } + } + } else { + RAW_LOG(ERROR, "Buffer too large to print corruption."); + } + + const MallocBlock* b = queue_entry.block; + const size_t size = queue_entry.size; + if (queue_entry.num_deleter_pcs > 0) { + TracePrintf(STDERR_FILENO, "Deleted by thread %p\n", + reinterpret_cast<void*>( + PRINTABLE_PTHREAD(queue_entry.deleter_threadid))); + + // We don't want to allocate or deallocate memory here, so we use + // placement-new. It's ok that we don't destroy this, since we're + // just going to error-exit below anyway. Union is for alignment. + union { void* alignment; char buf[sizeof(SymbolTable)]; } tablebuf; + SymbolTable* symbolization_table = new (tablebuf.buf) SymbolTable; + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { + // Symbolizes the previous address of pc because pc may be in the + // next function. This may happen when the function ends with + // a call to a function annotated noreturn (e.g. CHECK). + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + symbolization_table->Add(pc - 1); + } + if (FLAGS_symbolize_stacktrace) + symbolization_table->Symbolize(); + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + TracePrintf(STDERR_FILENO, " @ %p %s\n", + pc, symbolization_table->GetSymbol(pc - 1)); + } + } else { + RAW_LOG(ERROR, + "Skipping the printing of the deleter's stack! Its stack was " + "not found; either the corruption occurred too early in " + "execution to obtain a stack trace or --max_free_queue_size was " + "set to 0."); + } + + RAW_LOG(FATAL, + "Memory was written to after being freed. MallocBlock: %p, user " + "ptr: %p, size: %zd. If you can't find the source of the error, " + "try using ASan (http://code.google.com/p/address-sanitizer/), " + "Valgrind, or Purify, or study the " + "output of the deleter's stack printed above.", + b, b->data_addr(), size); + } + + static MallocBlock* FromRawPointer(void* p) { + const size_t data_offset = MallocBlock::data_offset(); + // Find the header just before client's memory. + MallocBlock *mb = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // If mb->alloc_type_ is kMagicDeletedSizeT, we're not an ok pointer. + if (mb->alloc_type_ == kMagicDeletedSizeT) { + RAW_LOG(FATAL, "memory allocation bug: object at %p has been already" + " deallocated; or else a word before the object has been" + " corrupted (memory stomping bug)", p); + } + // If mb->offset_ is zero (common case), mb is the real header. + // If mb->offset_ is non-zero, this block was allocated by debug + // memallign implementation, and mb->offset_ is the distance + // backwards to the real header from mb, which is a fake header. + if (mb->offset_ == 0) { + return mb; + } + + MallocBlock *main_block = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(mb) - mb->offset_); + + if (main_block->offset_ != 0) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " Need 0 but got %x", + (unsigned)(main_block->offset_)); + } + if (main_block >= p) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " Detected main_block address overflow: %x", + (unsigned)(mb->offset_)); + } + if (main_block->size2_addr() < p) { + RAW_LOG(FATAL, "memory corruption bug: offset_ field is corrupted." + " It points below it's own main_block: %x", + (unsigned)(mb->offset_)); + } + + return main_block; + } + + static const MallocBlock* FromRawPointer(const void* p) { + // const-safe version: we just cast about + return FromRawPointer(const_cast<void*>(p)); + } + + void Check(int type) const { + alloc_map_lock_.Lock(); + CheckLocked(type); + alloc_map_lock_.Unlock(); + } + + static bool CheckEverything() { + alloc_map_lock_.Lock(); + if (alloc_map_ != NULL) alloc_map_->Iterate(CheckCallback, 0); + alloc_map_lock_.Unlock(); + return true; // if we get here, we're okay + } + + static bool MemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + memset(histogram, 0, kMallocHistogramSize * sizeof(int)); + alloc_map_lock_.Lock(); + stats_blocks_ = 0; + stats_total_ = 0; + stats_histogram_ = histogram; + if (alloc_map_ != NULL) alloc_map_->Iterate(StatsCallback, 0); + *blocks = stats_blocks_; + *total = stats_total_; + alloc_map_lock_.Unlock(); + return true; + } + + private: // helpers for CheckEverything and MemoryStats + + static void CheckCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + FromRawPointer(ptr)->CheckLocked(*type); + } + } + + // Accumulation variables for StatsCallback protected by alloc_map_lock_ + static int stats_blocks_; + static size_t stats_total_; + static int* stats_histogram_; + + static void StatsCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + const MallocBlock* b = FromRawPointer(ptr); + b->CheckLocked(*type); + ++stats_blocks_; + size_t mysize = b->size1_; + int entry = 0; + stats_total_ += mysize; + while (mysize) { + ++entry; + mysize >>= 1; + } + RAW_CHECK(entry < kMallocHistogramSize, + "kMallocHistogramSize should be at least as large as log2 " + "of the maximum process memory size"); + stats_histogram_[entry] += 1; + } + } +}; + +void DanglingWriteChecker() { + // Clear out the remaining free queue to check for dangling writes. + MallocBlock::ProcessFreeQueue(NULL, 0, 0); +} + +// ========================================================================= // + +const size_t MallocBlock::kMagicMalloc; +const size_t MallocBlock::kMagicMMap; + +MallocBlock::AllocMap* MallocBlock::alloc_map_ = NULL; +SpinLock MallocBlock::alloc_map_lock_(SpinLock::LINKER_INITIALIZED); + +FreeQueue<MallocBlockQueueEntry>* MallocBlock::free_queue_ = NULL; +size_t MallocBlock::free_queue_size_ = 0; +SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED); + +unsigned char MallocBlock::kMagicDeletedBuffer[1024]; +pthread_once_t MallocBlock::deleted_buffer_initialized_ = PTHREAD_ONCE_INIT; +bool MallocBlock::deleted_buffer_initialized_no_pthreads_ = false; + +const char* const MallocBlock::kAllocName[] = { + "malloc", + "new", + "new []", + NULL, +}; + +const char* const MallocBlock::kDeallocName[] = { + "free", + "delete", + "delete []", + NULL, +}; + +int MallocBlock::stats_blocks_; +size_t MallocBlock::stats_total_; +int* MallocBlock::stats_histogram_; + +// ========================================================================= // + +// The following cut-down version of printf() avoids +// using stdio or ostreams. +// This is to guarantee no recursive calls into +// the allocator and to bound the stack space consumed. (The pthread +// manager thread in linuxthreads has a very small stack, +// so fprintf can't be called.) +static void TracePrintf(int fd, const char *fmt, ...) { + char buf[64]; + int i = 0; + va_list ap; + va_start(ap, fmt); + const char *p = fmt; + char numbuf[25]; + if (fd < 0) { + va_end(ap); + return; + } + numbuf[sizeof(numbuf)-1] = 0; + while (*p != '\0') { // until end of format string + char *s = &numbuf[sizeof(numbuf)-1]; + if (p[0] == '%' && p[1] != 0) { // handle % formats + int64 l = 0; + unsigned long base = 0; + if (*++p == 's') { // %s + s = va_arg(ap, char *); + } else if (*p == 'l' && p[1] == 'd') { // %ld + l = va_arg(ap, long); + base = 10; + p++; + } else if (*p == 'l' && p[1] == 'u') { // %lu + l = va_arg(ap, unsigned long); + base = 10; + p++; + } else if (*p == 'z' && p[1] == 'u') { // %zu + l = va_arg(ap, size_t); + base = 10; + p++; + } else if (*p == 'u') { // %u + l = va_arg(ap, unsigned int); + base = 10; + } else if (*p == 'd') { // %d + l = va_arg(ap, int); + base = 10; + } else if (*p == 'p') { // %p + l = va_arg(ap, intptr_t); + base = 16; + } else { + write(STDERR_FILENO, "Unimplemented TracePrintf format\n", 33); + write(STDERR_FILENO, p, 2); + write(STDERR_FILENO, "\n", 1); + abort(); + } + p++; + if (base != 0) { + bool minus = (l < 0 && base == 10); + uint64 ul = minus? -l : l; + do { + *--s = "0123456789abcdef"[ul % base]; + ul /= base; + } while (ul != 0); + if (base == 16) { + *--s = 'x'; + *--s = '0'; + } else if (minus) { + *--s = '-'; + } + } + } else { // handle normal characters + *--s = *p++; + } + while (*s != 0) { + if (i == sizeof(buf)) { + write(fd, buf, i); + i = 0; + } + buf[i++] = *s++; + } + } + if (i != 0) { + write(fd, buf, i); + } + va_end(ap); +} + +// Return the file descriptor we're writing a log to +static int TraceFd() { + static int trace_fd = -1; + if (trace_fd == -1) { // Open the trace file on the first call + const char *val = getenv("TCMALLOC_TRACE_FILE"); + bool fallback_to_stderr = false; + if (!val) { + val = "/tmp/google.alloc"; + fallback_to_stderr = true; + } + trace_fd = open(val, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (trace_fd == -1) { + if (fallback_to_stderr) { + trace_fd = 2; + TracePrintf(trace_fd, "Can't open %s. Logging to stderr.\n", val); + } else { + TracePrintf(2, "Can't open %s. Logging disabled.\n", val); + } + } + // Add a header to the log. + TracePrintf(trace_fd, "Trace started: %lu\n", + static_cast<unsigned long>(time(NULL))); + TracePrintf(trace_fd, + "func\tsize\tptr\tthread_id\tstack pcs for tools/symbolize\n"); + } + return trace_fd; +} + +// Print the hex stack dump on a single line. PCs are separated by tabs. +static void TraceStack(void) { + void *pcs[16]; + int n = GetStackTrace(pcs, sizeof(pcs)/sizeof(pcs[0]), 0); + for (int i = 0; i != n; i++) { + TracePrintf(TraceFd(), "\t%p", pcs[i]); + } +} + +// This protects MALLOC_TRACE, to make sure its info is atomically written. +static SpinLock malloc_trace_lock(SpinLock::LINKER_INITIALIZED); + +#define MALLOC_TRACE(name, size, addr) \ + do { \ + if (FLAGS_malloctrace) { \ + SpinLockHolder l(&malloc_trace_lock); \ + TracePrintf(TraceFd(), "%s\t%" PRIuS "\t%p\t%" GPRIuPTHREAD, \ + name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \ + TraceStack(); \ + TracePrintf(TraceFd(), "\n"); \ + } \ + } while (0) + +// ========================================================================= // + +// Write the characters buf[0, ..., size-1] to +// the malloc trace buffer. +// This function is intended for debugging, +// and is not declared in any header file. +// You must insert a declaration of it by hand when you need +// to use it. +void __malloctrace_write(const char *buf, size_t size) { + if (FLAGS_malloctrace) { + write(TraceFd(), buf, size); + } +} + +// ========================================================================= // + +// General debug allocation/deallocation + +static inline void* DebugAllocate(size_t size, int type) { + MallocBlock* ptr = MallocBlock::Allocate(size, type); + if (ptr == NULL) return NULL; + MALLOC_TRACE("malloc", size, ptr->data_addr()); + return ptr->data_addr(); +} + +static inline void DebugDeallocate(void* ptr, int type, size_t given_size) { + MALLOC_TRACE("free", + (ptr != 0 ? MallocBlock::FromRawPointer(ptr)->data_size() : 0), + ptr); + if (ptr) MallocBlock::FromRawPointer(ptr)->Deallocate(type, given_size); +} + +// ========================================================================= // + +// The following functions may be called via MallocExtension::instance() +// for memory verification and statistics. +class DebugMallocImplementation : public TCMallocImplementation { + public: + virtual bool GetNumericProperty(const char* name, size_t* value) { + bool result = TCMallocImplementation::GetNumericProperty(name, value); + if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { + // Subtract bytes kept in the free queue + size_t qsize = MallocBlock::FreeQueueSize(); + if (*value >= qsize) { + *value -= qsize; + } + } + return result; + } + + virtual bool VerifyNewMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); + return true; + } + + virtual bool VerifyArrayNewMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); + return true; + } + + virtual bool VerifyMallocMemory(const void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); + return true; + } + + virtual bool VerifyAllMemory() { + return MallocBlock::CheckEverything(); + } + + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + return MallocBlock::MemoryStats(blocks, total, histogram); + } + + virtual size_t GetEstimatedAllocatedSize(size_t size) { + return size; + } + + virtual size_t GetAllocatedSize(const void* p) { + if (p) { + RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned, + "ptr not allocated by tcmalloc"); + return MallocBlock::FromRawPointer(p)->data_size(); + } + return 0; + } + + virtual MallocExtension::Ownership GetOwnership(const void* p) { + if (!p) { + // nobody owns NULL + return MallocExtension::kNotOwned; + } + + // FIXME: note that correct GetOwnership should not touch memory + // that is not owned by tcmalloc. Main implementation is using + // pagemap to discover if page in question is owned by us or + // not. But pagemap only has marks for first and last page of + // spans. Note that if p was returned out of our memalign with + // big alignment, then it will point outside of marked pages. Also + // note that FromRawPointer call below requires touching memory + // before pointer in order to handle memalign-ed chunks + // (offset_). This leaves us with two options: + // + // * do FromRawPointer first and have possibility of crashing if + // we're given not owned pointer + // + // * return incorrect ownership for those large memalign chunks + // + // I've decided to choose later, which appears to happen rarer and + // therefore is arguably a lesser evil + + MallocExtension::Ownership rv = TCMallocImplementation::GetOwnership(p); + if (rv != MallocExtension::kOwned) { + return rv; + } + + const MallocBlock* mb = MallocBlock::FromRawPointer(p); + return TCMallocImplementation::GetOwnership(mb); + } + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kDebugFreeQueue = "debug.free_queue"; + + TCMallocImplementation::GetFreeListSizes(v); + + MallocExtension::FreeListInfo i; + i.type = kDebugFreeQueue; + i.min_object_size = 0; + i.max_object_size = numeric_limits<size_t>::max(); + i.total_bytes_free = MallocBlock::FreeQueueSize(); + v->push_back(i); + } + + }; + +static union { + char chars[sizeof(DebugMallocImplementation)]; + void *ptr; +} debug_malloc_implementation_space; + +REGISTER_MODULE_INITIALIZER(debugallocation, { +#if (__cplusplus >= 201103L) + COMPILE_ASSERT(alignof(debug_malloc_implementation_space) >= alignof(DebugMallocImplementation), + debug_malloc_implementation_space_is_not_properly_aligned); +#endif + // Either we or valgrind will control memory management. We + // register our extension if we're the winner. Otherwise let + // Valgrind use its own malloc (so don't register our extension). + if (!RunningOnValgrind()) { + DebugMallocImplementation *impl = new (debug_malloc_implementation_space.chars) DebugMallocImplementation(); + MallocExtension::Register(impl); + } +}); + +REGISTER_MODULE_DESTRUCTOR(debugallocation, { + if (!RunningOnValgrind()) { + // When the program exits, check all blocks still in the free + // queue for corruption. + DanglingWriteChecker(); + } +}); + +// ========================================================================= // + +struct debug_alloc_retry_data { + size_t size; + int new_type; +}; + +static void *retry_debug_allocate(void *arg) { + debug_alloc_retry_data *data = static_cast<debug_alloc_retry_data *>(arg); + return DebugAllocate(data->size, data->new_type); +} + +// This is mostly the same a cpp_alloc in tcmalloc.cc. +// TODO(csilvers): change Allocate() above to call cpp_alloc, so we +// don't have to reproduce the logic here. To make tc_new_mode work +// properly, I think we'll need to separate out the logic of throwing +// from the logic of calling the new-handler. +inline void* debug_cpp_alloc(size_t size, int new_type, bool nothrow) { + void* p = DebugAllocate(size, new_type); + if (p != NULL) { + return p; + } + struct debug_alloc_retry_data data; + data.size = size; + data.new_type = new_type; + return handle_oom(retry_debug_allocate, &data, + true, nothrow); +} + +inline void* do_debug_malloc_or_debug_cpp_alloc(size_t size) { + void* p = DebugAllocate(size, MallocBlock::kMallocType); + if (p != NULL) { + return p; + } + struct debug_alloc_retry_data data; + data.size = size; + data.new_type = MallocBlock::kMallocType; + return handle_oom(retry_debug_allocate, &data, + false, true); +} + +// Exported routines + +// frame forcer and force_frame exist only to prevent tail calls to +// DebugDeallocate to be actually implemented as tail calls. This is +// important because stack trace capturing in MallocBlockQueueEntry +// relies on google_malloc section being on stack and tc_XXX functions +// are in that section. So they must not jump to DebugDeallocate but +// have to do call. frame_forcer call at the end of such functions +// prevents tail calls to DebugDeallocate. +static int frame_forcer; +static void force_frame() { + int dummy = *(int volatile *)&frame_forcer; + (void)dummy; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_NOTHROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyMalloc(size); + } + void* ptr = do_debug_malloc_or_debug_cpp_alloc(size); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_NOTHROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyFree(ptr); + } + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, size); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t count, size_t size) PERFTOOLS_NOTHROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyCalloc(count, size); + } + // Overflow check + const size_t total_size = count * size; + if (size != 0 && total_size / size != count) return NULL; + + void* block = do_debug_malloc_or_debug_cpp_alloc(total_size); + MallocHook::InvokeNewHook(block, total_size); + if (block) memset(block, 0, total_size); + return block; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyFree(ptr); + } + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW { + if (tcmalloc::IsEmergencyPtr(ptr)) { + return tcmalloc::EmergencyRealloc(ptr, size); + } + if (ptr == NULL) { + ptr = do_debug_malloc_or_debug_cpp_alloc(size); + MallocHook::InvokeNewHook(ptr, size); + return ptr; + } + if (size == 0) { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + return NULL; + } + MallocBlock* old = MallocBlock::FromRawPointer(ptr); + old->Check(MallocBlock::kMallocType); + MallocBlock* p = MallocBlock::Allocate(size, MallocBlock::kMallocType); + + // If realloc fails we are to leave the old block untouched and + // return null + if (p == NULL) return NULL; + + // if ptr was allocated via memalign, then old->data_size() is not + // start of user data. So we must be careful to copy only user-data + char *old_begin = (char *)old->data_addr(); + char *old_end = old_begin + old->data_size(); + + ssize_t old_ssize = old_end - (char *)ptr; + CHECK_CONDITION(old_ssize >= 0); + + size_t old_size = (size_t)old_ssize; + CHECK_CONDITION(old_size <= old->data_size()); + + memcpy(p->data_addr(), ptr, (old_size < size) ? old_size : size); + MallocHook::InvokeDeleteHook(ptr); + MallocHook::InvokeNewHook(p->data_addr(), size); + DebugDeallocate(ptr, MallocBlock::kMallocType, 0); + MALLOC_TRACE("realloc", p->data_size(), p->data_addr()); + return p->data_addr(); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new failed.", size); + } + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_NOTHROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, 0); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, size); + force_frame(); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType, 0); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new[] failed.", size); + } + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + PERFTOOLS_NOTHROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, 0); + force_frame(); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, size); + force_frame(); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_NOTHROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType, 0); + force_frame(); +} + +// This is mostly the same as do_memalign in tcmalloc.cc. +static void *do_debug_memalign(size_t alignment, size_t size, int type) { + // Allocate >= size bytes aligned on "alignment" boundary + // "alignment" is a power of two. + void *p = 0; + RAW_CHECK((alignment & (alignment-1)) == 0, "must be power of two"); + const size_t data_offset = MallocBlock::data_offset(); + // Allocate "alignment-1" extra bytes to ensure alignment is possible, and + // a further data_offset bytes for an additional fake header. + size_t extra_bytes = data_offset + alignment - 1; + if (size + extra_bytes < size) return NULL; // Overflow + p = DebugAllocate(size + extra_bytes, type); + if (p != 0) { + intptr_t orig_p = reinterpret_cast<intptr_t>(p); + // Leave data_offset bytes for fake header, and round up to meet + // alignment. + p = reinterpret_cast<void *>(RoundUp(orig_p + data_offset, alignment)); + // Create a fake header block with an offset_ that points back to the + // real header. FromRawPointer uses this value. + MallocBlock *fake_hdr = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // offset_ is distance between real and fake headers. + // p is now end of fake header (beginning of client area), + // and orig_p is the end of the real header, so offset_ + // is their difference. + // + // Note that other fields of fake_hdr are initialized with + // kMagicUninitializedByte + fake_hdr->set_offset(reinterpret_cast<intptr_t>(p) - orig_p); + } + return p; +} + +struct memalign_retry_data { + size_t align; + size_t size; + int type; +}; + +static void *retry_debug_memalign(void *arg) { + memalign_retry_data *data = static_cast<memalign_retry_data *>(arg); + return do_debug_memalign(data->align, data->size, data->type); +} + +ATTRIBUTE_ALWAYS_INLINE +inline void* do_debug_memalign_or_debug_cpp_memalign(size_t align, + size_t size, + int type, + bool from_operator, + bool nothrow) { + void* p = do_debug_memalign(align, size, type); + if (p != NULL) { + return p; + } + + struct memalign_retry_data data; + data.align = align; + data.size = size; + data.type = type; + return handle_oom(retry_debug_memalign, &data, + from_operator, nothrow); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, size_t size) PERFTOOLS_NOTHROW { + void *p = do_debug_memalign_or_debug_cpp_memalign(align, size, MallocBlock::kMallocType, false, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +// Implementation taken from tcmalloc/tcmalloc.cc +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(void** result_ptr, size_t align, size_t size) + PERFTOOLS_NOTHROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = do_debug_memalign_or_debug_cpp_memalign(align, size, MallocBlock::kMallocType, false, true); + MallocHook::InvokeNewHook(result, size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_NOTHROW { + // Allocate >= size bytes starting on a page boundary + void *p = do_debug_memalign_or_debug_cpp_memalign(getpagesize(), size, MallocBlock::kMallocType, false, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_NOTHROW { + // Round size up to a multiple of pages + // then allocate memory on a page boundary + int pagesize = getpagesize(); + size = RoundUp(size, pagesize); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + void *p = do_debug_memalign_or_debug_cpp_memalign(pagesize, size, MallocBlock::kMallocType, false, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +#if defined(ENABLE_ALIGNED_NEW_DELETE) + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t align) { + void* result = do_debug_memalign_or_debug_cpp_memalign(static_cast<size_t>(align), size, MallocBlock::kNewType, true, false); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t align, const std::nothrow_t&) PERFTOOLS_NOTHROW { + void* result = do_debug_memalign_or_debug_cpp_memalign(static_cast<size_t>(align), size, MallocBlock::kNewType, true, true); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t) PERFTOOLS_NOTHROW { + tc_delete(p); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t align) PERFTOOLS_NOTHROW { + // Reproduce actual size calculation done by do_debug_memalign + const size_t alignment = static_cast<size_t>(align); + const size_t data_offset = MallocBlock::data_offset(); + const size_t extra_bytes = data_offset + alignment - 1; + + tc_delete_sized(p, size + extra_bytes); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t, const std::nothrow_t&) PERFTOOLS_NOTHROW { + tc_delete(p); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t align) { + void* result = do_debug_memalign_or_debug_cpp_memalign(static_cast<size_t>(align), size, MallocBlock::kArrayNewType, true, false); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t align, const std::nothrow_t& nt) PERFTOOLS_NOTHROW { + void* result = do_debug_memalign_or_debug_cpp_memalign(static_cast<size_t>(align), size, MallocBlock::kArrayNewType, true, true); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t) PERFTOOLS_NOTHROW { + tc_deletearray(p); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t align) PERFTOOLS_NOTHROW { + // Reproduce actual size calculation done by do_debug_memalign + const size_t alignment = static_cast<size_t>(align); + const size_t data_offset = MallocBlock::data_offset(); + const size_t extra_bytes = data_offset + alignment - 1; + + tc_deletearray_sized(p, size + extra_bytes); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t, const std::nothrow_t&) PERFTOOLS_NOTHROW { + tc_deletearray(p); +} + +#endif // defined(ENABLE_ALIGNED_NEW_DELETE) + +// malloc_stats just falls through to the base implementation. +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW { + do_malloc_stats(); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_NOTHROW { + return do_mallinfo(); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW { + return MallocExtension::instance()->GetAllocatedSize(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW { + void* result = DebugAllocate(size, MallocBlock::kMallocType); + MallocHook::InvokeNewHook(result, size); + return result; +} diff --git a/src/third_party/gperftools-2.7/src/emergency_malloc.cc b/src/third_party/gperftools-2.7/src/emergency_malloc.cc new file mode 100644 index 00000000000..81c55541ad2 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/emergency_malloc.cc @@ -0,0 +1,169 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "config.h" + +#include "emergency_malloc.h" + +#include <errno.h> // for ENOMEM, errno +#include <string.h> // for memset + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/low_level_alloc.h" +#include "base/spinlock.h" +#include "internal_logging.h" + + +namespace tcmalloc { + __attribute__ ((visibility("internal"))) char *emergency_arena_start; + __attribute__ ((visibility("internal"))) uintptr_t emergency_arena_start_shifted; + + static CACHELINE_ALIGNED SpinLock emergency_malloc_lock(base::LINKER_INITIALIZED); + static char *emergency_arena_end; + static LowLevelAlloc::Arena *emergency_arena; + + class EmergencyArenaPagesAllocator : public LowLevelAlloc::PagesAllocator { + ~EmergencyArenaPagesAllocator() {} + void *MapPages(int32 flags, size_t size) { + char *new_end = emergency_arena_end + size; + if (new_end > emergency_arena_start + kEmergencyArenaSize) { + RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes in emergency zone.", size); + } + char *rv = emergency_arena_end; + emergency_arena_end = new_end; + return static_cast<void *>(rv); + } + void UnMapPages(int32 flags, void *addr, size_t size) { + RAW_LOG(FATAL, "UnMapPages is not implemented for emergency arena"); + } + }; + + static union { + char bytes[sizeof(EmergencyArenaPagesAllocator)]; + void *ptr; + } pages_allocator_place; + + static void InitEmergencyMalloc(void) { + const int32 flags = LowLevelAlloc::kAsyncSignalSafe; + + void *arena = LowLevelAlloc::GetDefaultPagesAllocator()->MapPages(flags, kEmergencyArenaSize * 2); + + uintptr_t arena_ptr = reinterpret_cast<uintptr_t>(arena); + uintptr_t ptr = (arena_ptr + kEmergencyArenaSize - 1) & ~(kEmergencyArenaSize-1); + + emergency_arena_end = emergency_arena_start = reinterpret_cast<char *>(ptr); + EmergencyArenaPagesAllocator *allocator = new (pages_allocator_place.bytes) EmergencyArenaPagesAllocator(); + emergency_arena = LowLevelAlloc::NewArenaWithCustomAlloc(0, LowLevelAlloc::DefaultArena(), allocator); + + emergency_arena_start_shifted = reinterpret_cast<uintptr_t>(emergency_arena_start) >> kEmergencyArenaShift; + + uintptr_t head_unmap_size = ptr - arena_ptr; + CHECK_CONDITION(head_unmap_size < kEmergencyArenaSize); + if (head_unmap_size != 0) { + LowLevelAlloc::GetDefaultPagesAllocator()->UnMapPages(flags, arena, ptr - arena_ptr); + } + + uintptr_t tail_unmap_size = kEmergencyArenaSize - head_unmap_size; + void *tail_start = reinterpret_cast<void *>(arena_ptr + head_unmap_size + kEmergencyArenaSize); + LowLevelAlloc::GetDefaultPagesAllocator()->UnMapPages(flags, tail_start, tail_unmap_size); + } + + PERFTOOLS_DLL_DECL void *EmergencyMalloc(size_t size) { + SpinLockHolder l(&emergency_malloc_lock); + + if (emergency_arena_start == NULL) { + InitEmergencyMalloc(); + CHECK_CONDITION(emergency_arena_start != NULL); + } + + void *rv = LowLevelAlloc::AllocWithArena(size, emergency_arena); + if (rv == NULL) { + errno = ENOMEM; + } + return rv; + } + + PERFTOOLS_DLL_DECL void EmergencyFree(void *p) { + SpinLockHolder l(&emergency_malloc_lock); + if (emergency_arena_start == NULL) { + InitEmergencyMalloc(); + CHECK_CONDITION(emergency_arena_start != NULL); + free(p); + return; + } + CHECK_CONDITION(emergency_arena_start); + LowLevelAlloc::Free(p); + } + + PERFTOOLS_DLL_DECL void *EmergencyRealloc(void *_old_ptr, size_t new_size) { + if (_old_ptr == NULL) { + return EmergencyMalloc(new_size); + } + if (new_size == 0) { + EmergencyFree(_old_ptr); + return NULL; + } + SpinLockHolder l(&emergency_malloc_lock); + CHECK_CONDITION(emergency_arena_start); + + char *old_ptr = static_cast<char *>(_old_ptr); + CHECK_CONDITION(old_ptr <= emergency_arena_end); + CHECK_CONDITION(emergency_arena_start <= old_ptr); + + // NOTE: we don't know previous size of old_ptr chunk. So instead + // of trying to figure out right size of copied memory, we just + // copy largest possible size. We don't care about being slow. + size_t old_ptr_size = emergency_arena_end - old_ptr; + size_t copy_size = (new_size < old_ptr_size) ? new_size : old_ptr_size; + + void *new_ptr = LowLevelAlloc::AllocWithArena(new_size, emergency_arena); + if (new_ptr == NULL) { + errno = ENOMEM; + return NULL; + } + memcpy(new_ptr, old_ptr, copy_size); + + LowLevelAlloc::Free(old_ptr); + return new_ptr; + } + + PERFTOOLS_DLL_DECL void *EmergencyCalloc(size_t n, size_t elem_size) { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) return NULL; + void *rv = EmergencyMalloc(size); + if (rv != NULL) { + memset(rv, 0, size); + } + return rv; + } +}; diff --git a/src/third_party/gperftools-2.7/src/emergency_malloc.h b/src/third_party/gperftools-2.7/src/emergency_malloc.h new file mode 100644 index 00000000000..8a82cfc8d7d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/emergency_malloc.h @@ -0,0 +1,60 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef EMERGENCY_MALLOC_H +#define EMERGENCY_MALLOC_H +#include "config.h" + +#include <stddef.h> + +#include "base/basictypes.h" +#include "common.h" + +namespace tcmalloc { + static const uintptr_t kEmergencyArenaShift = 20+4; // 16 megs + static const uintptr_t kEmergencyArenaSize = 1 << kEmergencyArenaShift; + + extern __attribute__ ((visibility("internal"))) char *emergency_arena_start; + extern __attribute__ ((visibility("internal"))) uintptr_t emergency_arena_start_shifted;; + + PERFTOOLS_DLL_DECL void *EmergencyMalloc(size_t size); + PERFTOOLS_DLL_DECL void EmergencyFree(void *p); + PERFTOOLS_DLL_DECL void *EmergencyCalloc(size_t n, size_t elem_size); + PERFTOOLS_DLL_DECL void *EmergencyRealloc(void *old_ptr, size_t new_size); + + static inline bool IsEmergencyPtr(const void *_ptr) { + uintptr_t ptr = reinterpret_cast<uintptr_t>(_ptr); + return PREDICT_FALSE((ptr >> kEmergencyArenaShift) == emergency_arena_start_shifted) + && emergency_arena_start_shifted; + } + +} // namespace tcmalloc + +#endif diff --git a/src/third_party/gperftools-2.7/src/emergency_malloc_for_stacktrace.cc b/src/third_party/gperftools-2.7/src/emergency_malloc_for_stacktrace.cc new file mode 100644 index 00000000000..f1dc35e76a4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/emergency_malloc_for_stacktrace.cc @@ -0,0 +1,48 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include "emergency_malloc.h" +#include "thread_cache.h" + +namespace tcmalloc { + bool EnterStacktraceScope(void); + void LeaveStacktraceScope(void); +} + +bool tcmalloc::EnterStacktraceScope(void) { + if (ThreadCache::IsUseEmergencyMalloc()) { + return false; + } + ThreadCache::SetUseEmergencyMalloc(); + return true; +} + +void tcmalloc::LeaveStacktraceScope(void) { + ThreadCache::ResetUseEmergencyMalloc(); +} diff --git a/src/third_party/gperftools-2.7/src/fake_stacktrace_scope.cc b/src/third_party/gperftools-2.7/src/fake_stacktrace_scope.cc new file mode 100644 index 00000000000..ee35a041252 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/fake_stacktrace_scope.cc @@ -0,0 +1,39 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/basictypes.h" + +namespace tcmalloc { + ATTRIBUTE_WEAK bool EnterStacktraceScope(void) { + return true; + } + ATTRIBUTE_WEAK void LeaveStacktraceScope(void) { + } +} diff --git a/src/third_party/gperftools-2.7/src/getenv_safe.h b/src/third_party/gperftools-2.7/src/getenv_safe.h new file mode 100644 index 00000000000..3b9f4dbbcb2 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/getenv_safe.h @@ -0,0 +1,63 @@ +/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- + * Copyright (c) 2014, gperftools Contributors + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GETENV_SAFE_H +#define GETENV_SAFE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This getenv function is safe to call before the C runtime is initialized. + * On Windows, it utilizes GetEnvironmentVariable() and on unix it uses + * /proc/self/environ instead calling getenv(). It's intended to be used in + * routines that run before main(), when the state required for getenv() may + * not be set up yet. In particular, errno isn't set up until relatively late + * (after the pthreads library has a chance to make it threadsafe), and + * getenv() doesn't work until then. + * On some platforms, this call will utilize the same, static buffer for + * repeated GetenvBeforeMain() calls. Callers should not expect pointers from + * this routine to be long lived. + * Note that on unix, /proc only has the environment at the time the + * application was started, so this routine ignores setenv() calls/etc. Also + * note it only reads the first 16K of the environment. + * + * NOTE: this is version of GetenvBeforeMain that's usable from + * C. Implementation is in sysinfo.cc + */ +const char* TCMallocGetenvSafe(const char* name); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/third_party/gperftools-2.7/src/getpc.h b/src/third_party/gperftools-2.7/src/getpc.h new file mode 100644 index 00000000000..163873eabc6 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/getpc.h @@ -0,0 +1,192 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This is an internal header file used by profiler.cc. It defines +// the single (inline) function GetPC. GetPC is used in a signal +// handler to figure out the instruction that was being executed when +// the signal-handler was triggered. +// +// To get this, we use the ucontext_t argument to the signal-handler +// callback, which holds the full context of what was going on when +// the signal triggered. How to get from a ucontext_t to a Program +// Counter is OS-dependent. + +#ifndef BASE_GETPC_H_ +#define BASE_GETPC_H_ + +#include "config.h" + +// On many linux systems, we may need _GNU_SOURCE to get access to +// the defined constants that define the register we want to see (eg +// REG_EIP). Note this #define must come first! +#define _GNU_SOURCE 1 +// If #define _GNU_SOURCE causes problems, this might work instead. +// It will cause problems for FreeBSD though!, because it turns off +// the needed __BSD_VISIBLE. +//#define _XOPEN_SOURCE 500 + +#include <string.h> // for memcmp +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t (and also mcontext_t) +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif + + +// Take the example where function Foo() calls function Bar(). For +// many architectures, Bar() is responsible for setting up and tearing +// down its own stack frame. In that case, it's possible for the +// interrupt to happen when execution is in Bar(), but the stack frame +// is not properly set up (either before it's done being set up, or +// after it's been torn down but before Bar() returns). In those +// cases, the stack trace cannot see the caller function anymore. +// +// GetPC can try to identify this situation, on architectures where it +// might occur, and unwind the current function call in that case to +// avoid false edges in the profile graph (that is, edges that appear +// to show a call skipping over a function). To do this, we hard-code +// in the asm instructions we might see when setting up or tearing +// down a stack frame. +// +// This is difficult to get right: the instructions depend on the +// processor, the compiler ABI, and even the optimization level. This +// is a best effort patch -- if we fail to detect such a situation, or +// mess up the PC, nothing happens; the returned PC is not used for +// any further processing. +struct CallUnrollInfo { + // Offset from (e)ip register where this instruction sequence + // should be matched. Interpreted as bytes. Offset 0 is the next + // instruction to execute. Be extra careful with negative offsets in + // architectures of variable instruction length (like x86) - it is + // not that easy as taking an offset to step one instruction back! + int pc_offset; + // The actual instruction bytes. Feel free to make it larger if you + // need a longer sequence. + unsigned char ins[16]; + // How many bytes to match from ins array? + int ins_size; + // The offset from the stack pointer (e)sp where to look for the + // call return address. Interpreted as bytes. + int return_sp_offset; +}; + + +// The dereferences needed to get the PC from a struct ucontext were +// determined at configure time, and stored in the macro +// PC_FROM_UCONTEXT in config.h. The only thing we need to do here, +// then, is to do the magic call-unrolling for systems that support it. + +// -- Special case 1: linux x86, for which we have CallUnrollInfo +#if defined(__linux) && defined(__i386) && defined(__GNUC__) +static const CallUnrollInfo callunrollinfo[] = { + // Entry to a function: push %ebp; mov %esp,%ebp + // Top-of-stack contains the caller IP. + { 0, + {0x55, 0x89, 0xe5}, 3, + 0 + }, + // Entry to a function, second instruction: push %ebp; mov %esp,%ebp + // Top-of-stack contains the old frame, caller IP is +4. + { -1, + {0x55, 0x89, 0xe5}, 3, + 4 + }, + // Return from a function: RET. + // Top-of-stack contains the caller IP. + { 0, + {0xc3}, 1, + 0 + } +}; + +inline void* GetPC(const ucontext_t& signal_ucontext) { + // See comment above struct CallUnrollInfo. Only try instruction + // flow matching if both eip and esp looks reasonable. + const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP]; + const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP]; + if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 && + (esp & 0xffff0000) != 0) { + char* eip_char = reinterpret_cast<char*>(eip); + for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) { + if (!memcmp(eip_char + callunrollinfo[i].pc_offset, + callunrollinfo[i].ins, callunrollinfo[i].ins_size)) { + // We have a match. + void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset); + return *retaddr; + } + } + } + return (void*)eip; +} + +// Special case #2: Windows, which has to do something totally different. +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +// If this is ever implemented, probably the way to do it is to have +// profiler.cc use a high-precision timer via timeSetEvent: +// http://msdn2.microsoft.com/en-us/library/ms712713.aspx +// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC. +// The callback function would be something like prof_handler, but +// alas the arguments are different: no ucontext_t! I don't know +// how we'd get the PC (using StackWalk64?) +// http://msdn2.microsoft.com/en-us/library/ms680650.aspx + +#include "base/logging.h" // for RAW_LOG +#ifndef HAVE_CYGWIN_SIGNAL_H +typedef int ucontext_t; +#endif + +inline void* GetPC(const struct ucontext_t& signal_ucontext) { + RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n"); + return NULL; +} + +// Normal cases. If this doesn't compile, it's probably because +// PC_FROM_UCONTEXT is the empty string. You need to figure out +// the right value for your system, and add it to the list in +// configure.ac (or set it manually in your config.h). +#else +inline void* GetPC(const ucontext_t& signal_ucontext) { +#if defined(__s390__) && !defined(__s390x__) + // Mask out the AMODE31 bit from the PC recorded in the context. + return (void*)((unsigned long)signal_ucontext.PC_FROM_UCONTEXT & 0x7fffffffUL); +#else + return (void*)signal_ucontext.PC_FROM_UCONTEXT; // defined in config.h +#endif +} + +#endif + +#endif // BASE_GETPC_H_ diff --git a/src/third_party/gperftools-2.7/src/google/heap-checker.h b/src/third_party/gperftools-2.7/src/google/heap-checker.h new file mode 100644 index 00000000000..6b9ffe5a2ce --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/heap-checker.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/heap-checker.h is deprecated. Use gperftools/heap-checker.h instead" +#endif +#include <gperftools/heap-checker.h> diff --git a/src/third_party/gperftools-2.7/src/google/heap-profiler.h b/src/third_party/gperftools-2.7/src/google/heap-profiler.h new file mode 100644 index 00000000000..6155484986a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/heap-profiler.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/heap-profiler.h is deprecated. Use gperftools/heap-profiler.h instead" +#endif +#include <gperftools/heap-profiler.h> diff --git a/src/third_party/gperftools-2.7/src/google/malloc_extension.h b/src/third_party/gperftools-2.7/src/google/malloc_extension.h new file mode 100644 index 00000000000..fdad25a350e --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/malloc_extension.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/malloc_extension.h is deprecated. Use gperftools/malloc_extension.h instead" +#endif +#include <gperftools/malloc_extension.h> diff --git a/src/third_party/gperftools-2.7/src/google/malloc_extension_c.h b/src/third_party/gperftools-2.7/src/google/malloc_extension_c.h new file mode 100644 index 00000000000..3c5cd38e6a8 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/malloc_extension_c.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/malloc_extension_c.h is deprecated. Use gperftools/malloc_extension_c.h instead" +#endif +#include <gperftools/malloc_extension_c.h> diff --git a/src/third_party/gperftools-2.7/src/google/malloc_hook.h b/src/third_party/gperftools-2.7/src/google/malloc_hook.h new file mode 100644 index 00000000000..7ec000261d4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/malloc_hook.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/malloc_hook.h is deprecated. Use gperftools/malloc_hook.h instead" +#endif +#include <gperftools/malloc_hook.h> diff --git a/src/third_party/gperftools-2.7/src/google/malloc_hook_c.h b/src/third_party/gperftools-2.7/src/google/malloc_hook_c.h new file mode 100644 index 00000000000..eb21aaf2e23 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/malloc_hook_c.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/malloc_hook_c.h is deprecated. Use gperftools/malloc_hook_c.h instead" +#endif +#include <gperftools/malloc_hook_c.h> diff --git a/src/third_party/gperftools-2.7/src/google/profiler.h b/src/third_party/gperftools-2.7/src/google/profiler.h new file mode 100644 index 00000000000..293d60557cc --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/profiler.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/profiler.h is deprecated. Use gperftools/profiler.h instead" +#endif +#include <gperftools/profiler.h> diff --git a/src/third_party/gperftools-2.7/src/google/stacktrace.h b/src/third_party/gperftools-2.7/src/google/stacktrace.h new file mode 100644 index 00000000000..55f12d2ee1d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/stacktrace.h @@ -0,0 +1,36 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/stacktrace.h is deprecated. Use gperftools/stacktrace.h instead" +#endif +#include <gperftools/stacktrace.h> diff --git a/src/third_party/gperftools-2.7/src/google/tcmalloc.h b/src/third_party/gperftools-2.7/src/google/tcmalloc.h new file mode 100644 index 00000000000..1addeb6132c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/google/tcmalloc.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* The code has moved to gperftools/. Use that include-directory for + * new code. + */ +#if defined(__GNUC__) && !defined(GPERFTOOLS_SUPPRESS_LEGACY_WARNING) +#warning "google/tcmalloc.h is deprecated. Use gperftools/tcmalloc.h instead" +#endif +#include <gperftools/tcmalloc.h> diff --git a/src/third_party/gperftools-2.7/src/gperftools/heap-checker.h b/src/third_party/gperftools-2.7/src/gperftools/heap-checker.h new file mode 100644 index 00000000000..edd6cc7fb50 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/heap-checker.h @@ -0,0 +1,422 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat) +// +// +// Module for detecing heap (memory) leaks. +// +// For full(er) information, see docs/heap_checker.html +// +// This module can be linked into programs with +// no slowdown caused by this unless you activate the leak-checker: +// +// 1. Set the environment variable HEAPCHEK to _type_ before +// running the program. +// +// _type_ is usually "normal" but can also be "minimal", "strict", or +// "draconian". (See the html file for other options, like 'local'.) +// +// After that, just run your binary. If the heap-checker detects +// a memory leak at program-exit, it will print instructions on how +// to track down the leak. + +#ifndef BASE_HEAP_CHECKER_H_ +#define BASE_HEAP_CHECKER_H_ + +#include <sys/types.h> // for size_t +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> // for uintptr_t +#endif +#include <stdarg.h> // for va_list +#include <vector> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// The class is thread-safe with respect to all the provided static methods, +// as well as HeapLeakChecker objects: they can be accessed by multiple threads. +class PERFTOOLS_DLL_DECL HeapLeakChecker { + public: + + // ----------------------------------------------------------------------- // + // Static functions for working with (whole-program) leak checking. + + // If heap leak checking is currently active in some mode + // e.g. if leak checking was started (and is still active now) + // due to HEAPCHECK=... defined in the environment. + // The return value reflects iff HeapLeakChecker objects manually + // constructed right now will be doing leak checking or nothing. + // Note that we can go from active to inactive state during InitGoogle() + // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle(). + static bool IsActive(); + + // Return pointer to the whole-program checker if it has been created + // and NULL otherwise. + // Once GlobalChecker() returns non-NULL that object will not disappear and + // will be returned by all later GlobalChecker calls. + // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below) + // for the whole-program checker after one calls NoGlobalLeaks() + // or similar and gets false. + static HeapLeakChecker* GlobalChecker(); + + // Do whole-program leak check now (if it was activated for this binary); + // return false only if it was activated and has failed. + // The mode of the check is controlled by the command-line flags. + // This method can be called repeatedly. + // Things like GlobalChecker()->SameHeap() can also be called explicitly + // to do the desired flavor of the check. + static bool NoGlobalLeaks(); + + // If whole-program checker if active, + // cancel its automatic execution after main() exits. + // This requires that some leak check (e.g. NoGlobalLeaks()) + // has been called at least once on the whole-program checker. + static void CancelGlobalCheck(); + + // ----------------------------------------------------------------------- // + // Non-static functions for starting and doing leak checking. + + // Start checking and name the leak check performed. + // The name is used in naming dumped profiles + // and needs to be unique only within your binary. + // It must also be a string that can be a part of a file name, + // in particular not contain path expressions. + explicit HeapLeakChecker(const char *name); + + // Destructor (verifies that some *NoLeaks or *SameHeap method + // has been called at least once). + ~HeapLeakChecker(); + + // These used to be different but are all the same now: they return + // true iff all memory allocated since this HeapLeakChecker object + // was constructor is still reachable from global state. + // + // Because we fork to convert addresses to symbol-names, and forking + // is not thread-safe, and we may be called in a threaded context, + // we do not try to symbolize addresses when called manually. + bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); } + + // These forms are obsolete; use NoLeaks() instead. + // TODO(csilvers): mark as DEPRECATED. + bool QuickNoLeaks() { return NoLeaks(); } + bool BriefNoLeaks() { return NoLeaks(); } + bool SameHeap() { return NoLeaks(); } + bool QuickSameHeap() { return NoLeaks(); } + bool BriefSameHeap() { return NoLeaks(); } + + // Detailed information about the number of leaked bytes and objects + // (both of these can be negative as well). + // These are available only after a *SameHeap or *NoLeaks + // method has been called. + // Note that it's possible for both of these to be zero + // while SameHeap() or NoLeaks() returned false in case + // of a heap state change that is significant + // but preserves the byte and object counts. + ssize_t BytesLeaked() const; + ssize_t ObjectsLeaked() const; + + // ----------------------------------------------------------------------- // + // Static helpers to make us ignore certain leaks. + + // Scoped helper class. Should be allocated on the stack inside a + // block of code. Any heap allocations done in the code block + // covered by the scoped object (including in nested function calls + // done by the code block) will not be reported as leaks. This is + // the recommended replacement for the GetDisableChecksStart() and + // DisableChecksToHereFrom() routines below. + // + // Example: + // void Foo() { + // HeapLeakChecker::Disabler disabler; + // ... code that allocates objects whose leaks should be ignored ... + // } + // + // REQUIRES: Destructor runs in same thread as constructor + class Disabler { + public: + Disabler(); + ~Disabler(); + private: + Disabler(const Disabler&); // disallow copy + void operator=(const Disabler&); // and assign + }; + + // Ignore an object located at 'ptr' (can go at the start or into the object) + // as well as all heap objects (transitively) referenced from it for the + // purposes of heap leak checking. Returns 'ptr' so that one can write + // static T* obj = IgnoreObject(new T(...)); + // + // If 'ptr' does not point to an active allocated object at the time of this + // call, it is ignored; but if it does, the object must not get deleted from + // the heap later on. + // + // See also HiddenPointer, below, if you need to prevent a pointer from + // being traversed by the heap checker but do not wish to transitively + // whitelist objects referenced through it. + template <typename T> + static T* IgnoreObject(T* ptr) { + DoIgnoreObject(static_cast<const void*>(const_cast<const T*>(ptr))); + return ptr; + } + + // Undo what an earlier IgnoreObject() call promised and asked to do. + // At the time of this call 'ptr' must point at or inside of an active + // allocated object which was previously registered with IgnoreObject(). + static void UnIgnoreObject(const void* ptr); + + // ----------------------------------------------------------------------- // + // Internal types defined in .cc + + class Allocator; + struct RangeValue; + + private: + + // ----------------------------------------------------------------------- // + // Various helpers + + // Create the name of the heap profile file. + // Should be deleted via Allocator::Free(). + char* MakeProfileNameLocked(); + + // Helper for constructors + void Create(const char *name, bool make_start_snapshot); + + enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE }; + + // Helper for *NoLeaks and *SameHeap + bool DoNoLeaks(ShouldSymbolize should_symbolize); + + // Helper for NoGlobalLeaks, also called by the global destructor. + static bool NoGlobalLeaksMaybeSymbolize(ShouldSymbolize should_symbolize); + + // These used to be public, but they are now deprecated. + // Will remove entirely when all internal uses are fixed. + // In the meantime, use friendship so the unittest can still test them. + static void* GetDisableChecksStart(); + static void DisableChecksToHereFrom(const void* start_address); + static void DisableChecksIn(const char* pattern); + friend void RangeDisabledLeaks(); + friend void NamedTwoDisabledLeaks(); + friend void* RunNamedDisabledLeaks(void*); + friend void TestHeapLeakCheckerNamedDisabling(); + + // Actually implements IgnoreObject(). + static void DoIgnoreObject(const void* ptr); + + // Disable checks based on stack trace entry at a depth <= + // max_depth. Used to hide allocations done inside some special + // libraries. + static void DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth); + + // Helper for DoNoLeaks to ignore all objects reachable from all live data + static void IgnoreAllLiveObjectsLocked(const void* self_stack_top); + + // Callback we pass to TCMalloc_ListAllProcessThreads (see thread_lister.h) + // that is invoked when all threads of our process are found and stopped. + // The call back does the things needed to ignore live data reachable from + // thread stacks and registers for all our threads + // as well as do other global-live-data ignoring + // (via IgnoreNonThreadLiveObjectsLocked) + // during the quiet state of all threads being stopped. + // For the argument meaning see the comment by TCMalloc_ListAllProcessThreads. + // Here we only use num_threads and thread_pids, that TCMalloc_ListAllProcessThreads + // fills for us with the number and pids of all the threads of our process + // it found and attached to. + static int IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list ap); + + // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked + // that we prefer to execute from IgnoreLiveThreadsLocked + // while all threads are stopped. + // This helper does live object discovery and ignoring + // for all objects that are reachable from everything + // not related to thread stacks and registers. + static void IgnoreNonThreadLiveObjectsLocked(); + + // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked + // to discover and ignore all heap objects + // reachable from currently considered live objects + // (live_objects static global variable in out .cc file). + // "name", "name2" are two strings that we print one after another + // in a debug message to describe what kind of live object sources + // are being used. + static void IgnoreLiveObjectsLocked(const char* name, const char* name2); + + // Do the overall whole-program heap leak check if needed; + // returns true when did the leak check. + static bool DoMainHeapCheck(); + + // Type of task for UseProcMapsLocked + enum ProcMapsTask { + RECORD_GLOBAL_DATA, + DISABLE_LIBRARY_ALLOCS + }; + + // Success/Error Return codes for UseProcMapsLocked. + enum ProcMapsResult { + PROC_MAPS_USED, + CANT_OPEN_PROC_MAPS, + NO_SHARED_LIBS_IN_PROC_MAPS + }; + + // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line. + static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task); + + // A ProcMapsTask to disable allocations from 'library' + // that is mapped to [start_address..end_address) + // (only if library is a certain system library). + static void DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address); + + // Return true iff "*ptr" points to a heap object + // ("*ptr" can point at the start or inside of a heap object + // so that this works e.g. for pointers to C++ arrays, C++ strings, + // multiple-inherited objects, or pointers to members). + // We also fill *object_size for this object then + // and we move "*ptr" to point to the very start of the heap object. + static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size); + + // Helper to shutdown heap leak checker when it's not needed + // or can't function properly. + static void TurnItselfOffLocked(); + + // Internally-used c-tor to start whole-executable checking. + HeapLeakChecker(); + + // ----------------------------------------------------------------------- // + // Friends and externally accessed helpers. + + // Helper for VerifyHeapProfileTableStackGet in the unittest + // to get the recorded allocation caller for ptr, + // which must be a heap object. + static const void* GetAllocCaller(void* ptr); + friend void VerifyHeapProfileTableStackGet(); + + // This gets to execute before constructors for all global objects + static void BeforeConstructorsLocked(); + friend void HeapLeakChecker_BeforeConstructors(); + + // This gets to execute after destructors for all global objects + friend void HeapLeakChecker_AfterDestructors(); + + // Full starting of recommended whole-program checking. + friend void HeapLeakChecker_InternalInitStart(); + + // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially + // calls DoMainHeapCheck + friend void HeapLeakChecker_RunHeapCleanups(); + + // ----------------------------------------------------------------------- // + // Member data. + + class SpinLock* lock_; // to make HeapLeakChecker objects thread-safe + const char* name_; // our remembered name (we own it) + // NULL means this leak checker is a noop + + // Snapshot taken when the checker was created. May be NULL + // for the global heap checker object. We use void* instead of + // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h. + void* start_snapshot_; + + bool has_checked_; // if we have done the leak check, so these are ready: + ssize_t inuse_bytes_increase_; // bytes-in-use increase for this checker + ssize_t inuse_allocs_increase_; // allocations-in-use increase + // for this checker + bool keep_profiles_; // iff we should keep the heap profiles we've made + + // ----------------------------------------------------------------------- // + + // Disallow "evil" constructors. + HeapLeakChecker(const HeapLeakChecker&); + void operator=(const HeapLeakChecker&); +}; + + +// Holds a pointer that will not be traversed by the heap checker. +// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and +// all objects reachable from o are ignored by the heap checker. +template <class T> +class HiddenPointer { + public: + explicit HiddenPointer(T* t) + : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) { + } + // Returns unhidden pointer. Be careful where you save the result. + T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); } + + private: + // Arbitrary value, but not such that xor'ing with it is likely + // to map one valid pointer to another valid pointer: + static const uintptr_t kHideMask = + static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll); + uintptr_t masked_t_; +}; + +// A class that exists solely to run its destructor. This class should not be +// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below. +class PERFTOOLS_DLL_DECL HeapCleaner { + public: + typedef void (*void_function)(void); + HeapCleaner(void_function f); + static void RunHeapCleanups(); + private: + static std::vector<void_function>* heap_cleanups_; +}; + +// A macro to declare module heap check cleanup tasks +// (they run only if we are doing heap leak checking.) +// 'body' should be the cleanup code to run. 'name' doesn't matter, +// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls. +#define REGISTER_HEAPCHECK_CLEANUP(name, body) \ + namespace { \ + void heapcheck_cleanup_##name() { body; } \ + static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \ + } + +#endif // BASE_HEAP_CHECKER_H_ diff --git a/src/third_party/gperftools-2.7/src/gperftools/heap-profiler.h b/src/third_party/gperftools-2.7/src/gperftools/heap-profiler.h new file mode 100644 index 00000000000..38c6afef8ca --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/heap-profiler.h @@ -0,0 +1,105 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for heap-profiling. + * + * For full(er) information, see docs/heapprofile.html + * + * This module can be linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "HEAPPROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "HeapProfilerStart(filename)" and "HeapProfilerStop()". + * + */ + +#ifndef BASE_HEAP_PROFILER_H_ +#define BASE_HEAP_PROFILER_H_ + +#include <stddef.h> + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Start profiling and arrange to write profile data to file names + * of the form: "prefix.0000", "prefix.0001", ... + */ +PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); + +/* Returns non-zero if we are currently profiling the heap. (Returns + * an int rather than a bool so it's usable from C.) This is true + * between calls to HeapProfilerStart() and HeapProfilerStop(), and + * also if the program has been run with HEAPPROFILER, or some other + * way to turn on whole-program profiling. + */ +int IsHeapProfilerRunning(); + +/* Stop heap profiling. Can be restarted again with HeapProfilerStart(), + * but the currently accumulated profiling information will be cleared. + */ +PERFTOOLS_DLL_DECL void HeapProfilerStop(); + +/* Dump a profile now - can be used for dumping at a hopefully + * quiescent state in your program, in order to more easily track down + * memory leaks. Will include the reason in the logged message + */ +PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason); + +/* Generate current heap profiling information. + * Returns an empty string when heap profiling is not active. + * The returned pointer is a '\0'-terminated string allocated using malloc() + * and should be free()-ed as soon as the caller does not need it anymore. + */ +PERFTOOLS_DLL_DECL char* GetHeapProfile(); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_HEAP_PROFILER_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/malloc_extension.h b/src/third_party/gperftools-2.7/src/gperftools/malloc_extension.h new file mode 100644 index 00000000000..689b5f17cef --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/malloc_extension.h @@ -0,0 +1,434 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. +// +// NOTE FOR C USERS: If you wish to use this functionality from within +// a C program, see malloc_extension_c.h. + +#ifndef BASE_MALLOC_EXTENSION_H_ +#define BASE_MALLOC_EXTENSION_H_ + +#include <stddef.h> +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> +#endif +#include <string> +#include <vector> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +static const int kMallocHistogramSize = 64; + +// One day, we could support other types of writers (perhaps for C?) +typedef std::string MallocExtensionWriter; + +namespace base { +struct MallocRange; +} + +// Interface to a pluggable system allocator. +class PERFTOOLS_DLL_DECL SysAllocator { + public: + SysAllocator() { + } + virtual ~SysAllocator(); + + // Allocates "size"-byte of memory from system aligned with "alignment". + // Returns NULL if failed. Otherwise, the returned pointer p up to and + // including (p + actual_size -1) have been allocated. + virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; +}; + +// The default implementations of the following routines do nothing. +// All implementations should be thread-safe; the current one +// (TCMallocImplementation) is. +class PERFTOOLS_DLL_DECL MallocExtension { + public: + virtual ~MallocExtension(); + + // Call this very early in the program execution -- say, in a global + // constructor -- to set up parameters and state needed by all + // instrumented malloc implemenatations. One example: this routine + // sets environemnt variables to tell STL to use libc's malloc() + // instead of doing its own memory management. This is safe to call + // multiple times, as long as each time is before threads start up. + static void Initialize(); + + // See "verify_memory.h" to see what these routines do + virtual bool VerifyAllMemory(); + virtual bool VerifyNewMemory(const void* p); + virtual bool VerifyArrayNewMemory(const void* p); + virtual bool VerifyMallocMemory(const void* p); + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]); + + // Get a human readable description of the following malloc data structures. + // - Total inuse memory by application. + // - Free memory(thread, central and page heap), + // - Freelist of central cache, each class. + // - Page heap freelist. + // The state is stored as a null-terminated string + // in a prefix of "buffer[0,buffer_length-1]". + // REQUIRES: buffer_length > 0. + virtual void GetStats(char* buffer, int buffer_length); + + // Outputs to "writer" a sample of live objects and the stack traces + // that allocated these objects. The format of the returned output + // is equivalent to the output of the heap profiler and can + // therefore be passed to "pprof". This function is equivalent to + // ReadStackTraces. The main difference is that this function returns + // serialized data appropriately formatted for use by the pprof tool. + // NOTE: by default, tcmalloc does not do any heap sampling, and this + // function will always return an empty sample. To get useful + // data from GetHeapSample, you must also set the environment + // variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288. + virtual void GetHeapSample(MallocExtensionWriter* writer); + + // Outputs to "writer" the stack traces that caused growth in the + // address space size. The format of the returned output is + // equivalent to the output of the heap profiler and can therefore + // be passed to "pprof". This function is equivalent to + // ReadHeapGrowthStackTraces. The main difference is that this function + // returns serialized data appropriately formatted for use by the + // pprof tool. (This does not depend on, or require, + // TCMALLOC_SAMPLE_PARAMETER.) + virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); + + // Invokes func(arg, range) for every controlled memory + // range. *range is filled in with information about the range. + // + // This is a best-effort interface useful only for performance + // analysis. The implementation may not call func at all. + typedef void (RangeFunction)(void*, const base::MallocRange*); + virtual void Ranges(void* arg, RangeFunction func); + + // ------------------------------------------------------------------- + // Control operations for getting and setting malloc implementation + // specific parameters. Some currently useful properties: + // + // generic + // ------- + // "generic.current_allocated_bytes" + // Number of bytes currently allocated by application + // This property is not writable. + // + // "generic.heap_size" + // Number of bytes in the heap == + // current_allocated_bytes + + // fragmentation + + // freed memory regions + // This property is not writable. + // + // tcmalloc + // -------- + // "tcmalloc.max_total_thread_cache_bytes" + // Upper limit on total number of bytes stored across all + // per-thread caches. Default: 16MB. + // + // "tcmalloc.current_total_thread_cache_bytes" + // Number of bytes used across all thread caches. + // This property is not writable. + // + // "tcmalloc.central_cache_free_bytes" + // Number of free bytes in the central cache that have been + // assigned to size classes. They always count towards virtual + // memory usage, and unless the underlying memory is swapped out + // by the OS, they also count towards physical memory usage. + // This property is not writable. + // + // "tcmalloc.transfer_cache_free_bytes" + // Number of free bytes that are waiting to be transfered between + // the central cache and a thread cache. They always count + // towards virtual memory usage, and unless the underlying memory + // is swapped out by the OS, they also count towards physical + // memory usage. This property is not writable. + // + // "tcmalloc.thread_cache_free_bytes" + // Number of free bytes in thread caches. They always count + // towards virtual memory usage, and unless the underlying memory + // is swapped out by the OS, they also count towards physical + // memory usage. This property is not writable. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in page heap. These + // bytes can be used to fulfill allocation requests. They + // always count towards virtual memory usage, and unless the + // underlying memory is swapped out by the OS, they also count + // towards physical memory usage. This property is not writable. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in page heap. + // These are bytes that have been released back to the OS, + // possibly by one of the MallocExtension "Release" calls. + // They can be used to fulfill allocation requests, but + // typically incur a page fault. They always count towards + // virtual memory usage, and depending on the OS, typically + // do not count towards physical memory usage. This property + // is not writable. + // ------------------------------------------------------------------- + + // Get the named "property"'s value. Returns true if the property + // is known. Returns false if the property is not a valid property + // name for the current malloc implementation. + // REQUIRES: property != NULL; value != NULL + virtual bool GetNumericProperty(const char* property, size_t* value); + + // Set the named "property"'s value. Returns true if the property + // is known and writable. Returns false if the property is not a + // valid property name for the current malloc implementation, or + // is not writable. + // REQUIRES: property != NULL + virtual bool SetNumericProperty(const char* property, size_t value); + + // Mark the current thread as "idle". This routine may optionally + // be called by threads as a hint to the malloc implementation that + // any thread-specific resources should be released. Note: this may + // be an expensive routine, so it should not be called too often. + // + // Also, if the code that calls this routine will go to sleep for + // a while, it should take care to not allocate anything between + // the call to this routine and the beginning of the sleep. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadIdle(); + + // Mark the current thread as "busy". This routine should be + // called after MarkThreadIdle() if the thread will now do more + // work. If this method is not called, performance may suffer. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadBusy(); + + // Gets the system allocator used by the malloc extension instance. Returns + // NULL for malloc implementations that do not support pluggable system + // allocators. + virtual SysAllocator* GetSystemAllocator(); + + // Sets the system allocator to the specified. + // + // Users could register their own system allocators for malloc implementation + // that supports pluggable system allocators, such as TCMalloc, by doing: + // alloc = new MyOwnSysAllocator(); + // MallocExtension::instance()->SetSystemAllocator(alloc); + // It's up to users whether to fall back (recommended) to the default + // system allocator (use GetSystemAllocator() above) or not. The caller is + // responsible to any necessary locking. + // See tcmalloc/system-alloc.h for the interface and + // tcmalloc/memfs_malloc.cc for the examples. + // + // It's a no-op for malloc implementations that do not support pluggable + // system allocators. + virtual void SetSystemAllocator(SysAllocator *a); + + // Try to release num_bytes of free memory back to the operating + // system for reuse. Use this extension with caution -- to get this + // memory back may require faulting pages back in by the OS, and + // that may be slow. (Currently only implemented in tcmalloc.) + virtual void ReleaseToSystem(size_t num_bytes); + + // Same as ReleaseToSystem() but release as much memory as possible. + virtual void ReleaseFreeMemory(); + + // Sets the rate at which we release unused memory to the system. + // Zero means we never release memory back to the system. Increase + // this flag to return memory faster; decrease it to return memory + // slower. Reasonable rates are in the range [0,10]. (Currently + // only implemented in tcmalloc). + virtual void SetMemoryReleaseRate(double rate); + + // Gets the release rate. Returns a value < 0 if unknown. + virtual double GetMemoryReleaseRate(); + + // Returns the estimated number of bytes that will be allocated for + // a request of "size" bytes. This is an estimate: an allocation of + // SIZE bytes may reserve more bytes, but will never reserve less. + // (Currently only implemented in tcmalloc, other implementations + // always return SIZE.) + // This is equivalent to malloc_good_size() in OS X. + virtual size_t GetEstimatedAllocatedSize(size_t size); + + // Returns the actual number N of bytes reserved by tcmalloc for the + // pointer p. The client is allowed to use the range of bytes + // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this + // allocation). This number may be equal to or greater than the number + // of bytes requested when p was allocated. + // p must have been allocated by this malloc implementation, + // must not be an interior pointer -- that is, must be exactly + // the pointer returned to by malloc() et al., not some offset + // from that -- and should not have been freed yet. p may be NULL. + // (Currently only implemented in tcmalloc; other implementations + // will return 0.) + // This is equivalent to malloc_size() in OS X, malloc_usable_size() + // in glibc, and _msize() for windows. + virtual size_t GetAllocatedSize(const void* p); + + // Returns kOwned if this malloc implementation allocated the memory + // pointed to by p, or kNotOwned if some other malloc implementation + // allocated it or p is NULL. May also return kUnknownOwnership if + // the malloc implementation does not keep track of ownership. + // REQUIRES: p must be a value returned from a previous call to + // malloc(), calloc(), realloc(), memalign(), posix_memalign(), + // valloc(), pvalloc(), new, or new[], and must refer to memory that + // is currently allocated (so, for instance, you should not pass in + // a pointer after having called free() on it). + enum Ownership { + // NOTE: Enum values MUST be kept in sync with the version in + // malloc_extension_c.h + kUnknownOwnership = 0, + kOwned, + kNotOwned + }; + virtual Ownership GetOwnership(const void* p); + + // The current malloc implementation. Always non-NULL. + static MallocExtension* instance(); + + // Change the malloc implementation. Typically called by the + // malloc implementation during initialization. + static void Register(MallocExtension* implementation); + + // Returns detailed information about malloc's freelists. For each list, + // return a FreeListInfo: + struct FreeListInfo { + size_t min_object_size; + size_t max_object_size; + size_t total_bytes_free; + const char* type; + }; + // Each item in the vector refers to a different freelist. The lists + // are identified by the range of allocations that objects in the + // list can satisfy ([min_object_size, max_object_size]) and the + // type of freelist (see below). The current size of the list is + // returned in total_bytes_free (which count against a processes + // resident and virtual size). + // + // Currently supported types are: + // + // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size + // class in the page heap is returned. Bytes in "page_unmapped" + // are no longer backed by physical memory and do not count against + // the resident size of a process. + // + // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger + // than the largest page heap size class. Only one "large" + // entry is returned. There is no upper-bound on the size + // of objects in the large free list; this call returns + // kint64max for max_object_size. Bytes in + // "large_unmapped" are no longer backed by physical memory + // and do not count against the resident size of a process. + // + // "tcmalloc.central" - tcmalloc's central free-list. One entry per + // size-class is returned. Never unmapped. + // + // "debug.free_queue" - free objects queued by the debug allocator + // and not returned to tcmalloc. + // + // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped. + virtual void GetFreeListSizes(std::vector<FreeListInfo>* v); + + // Get a list of stack traces of sampled allocation points. Returns + // a pointer to a "new[]-ed" result array, and stores the sample + // period in "sample_period". + // + // The state is stored as a sequence of adjacent entries + // in the returned array. Each entry has the following form: + // uintptr_t count; // Number of objects with following trace + // uintptr_t size; // Total size of objects with following trace + // uintptr_t depth; // Number of PC values in stack trace + // void* stack[depth]; // PC values that form the stack trace + // + // The list of entries is terminated by a "count" of 0. + // + // It is the responsibility of the caller to "delete[]" the returned array. + // + // May return NULL to indicate no results. + // + // This is an internal extension. Callers should use the more + // convenient "GetHeapSample(string*)" method defined above. + virtual void** ReadStackTraces(int* sample_period); + + // Like ReadStackTraces(), but returns stack traces that caused growth + // in the address space size. + virtual void** ReadHeapGrowthStackTraces(); + + // Returns the size in bytes of the calling threads cache. + virtual size_t GetThreadCacheSize(); + + // Like MarkThreadIdle, but does not destroy the internal data + // structures of the thread cache. When the thread resumes, it wil + // have an empty cache but will not need to pay to reconstruct the + // cache data structures. + virtual void MarkThreadTemporarilyIdle(); +}; + +namespace base { + +// Information passed per range. More fields may be added later. +struct MallocRange { + enum Type { + INUSE, // Application is using this range + FREE, // Range is currently free + UNMAPPED, // Backing physical memory has been returned to the OS + UNKNOWN + // More enum values may be added in the future + }; + + uintptr_t address; // Address of range + size_t length; // Byte length of range + Type type; // Type of this range + double fraction; // Fraction of range that is being used (0 if !INUSE) + + // Perhaps add the following: + // - stack trace if this range was sampled + // - heap growth stack trace if applicable to this range + // - age when allocated (for inuse) or freed (if not in use) +}; + +} // namespace base + +#endif // BASE_MALLOC_EXTENSION_H_ diff --git a/src/third_party/gperftools-2.7/src/gperftools/malloc_extension_c.h b/src/third_party/gperftools-2.7/src/gperftools/malloc_extension_c.h new file mode 100644 index 00000000000..70ff6868ecf --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/malloc_extension_c.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_extension.h. See malloc_extension.h for + * details. Note these C shims always work on + * MallocExtension::instance(); it is not possible to have more than + * one MallocExtension object in C applications. + */ + +#ifndef _MALLOC_EXTENSION_C_H_ +#define _MALLOC_EXTENSION_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Annoying stuff for windows -- makes sure clients can import these fns */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define kMallocExtensionHistogramSize 64 + +PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p); +PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocExtensionHistogramSize]); +PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length); + +/* TODO(csilvers): write a C version of these routines, that perhaps + * takes a function ptr and a void *. + */ +/* void MallocExtension_GetHeapSample(string* result); */ +/* void MallocExtension_GetHeapGrowthStacks(string* result); */ + +PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value); +PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetThreadCacheSize(void); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadTemporarilyIdle(void); + +/* + * NOTE: These enum values MUST be kept in sync with the version in + * malloc_extension.h + */ +typedef enum { + MallocExtension_kUnknownOwnership = 0, + MallocExtension_kOwned, + MallocExtension_kNotOwned +} MallocExtension_Ownership; + +PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _MALLOC_EXTENSION_C_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/malloc_hook.h b/src/third_party/gperftools-2.7/src/gperftools/malloc_hook.h new file mode 100644 index 00000000000..b76411fb590 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/malloc_hook.h @@ -0,0 +1,359 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Some of our malloc implementations can invoke the following hooks whenever +// memory is allocated or deallocated. MallocHook is thread-safe, and things +// you do before calling AddFooHook(MyHook) are visible to any resulting calls +// to MyHook. Hooks must be thread-safe. If you write: +// +// CHECK(MallocHook::AddNewHook(&MyNewHook)); +// +// MyNewHook will be invoked in subsequent calls in the current thread, but +// there are no guarantees on when it might be invoked in other threads. +// +// There are a limited number of slots available for each hook type. Add*Hook +// will return false if there are no slots available. Remove*Hook will return +// false if the given hook was not already installed. +// +// The order in which individual hooks are called in Invoke*Hook is undefined. +// +// It is safe for a hook to remove itself within Invoke*Hook and add other +// hooks. Any hooks added inside a hook invocation (for the same hook type) +// will not be invoked for the current invocation. +// +// One important user of these hooks is the heap profiler. +// +// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be +// directly in the code of the (de)allocation function that is provided to the +// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute. +// +// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h. If you +// need to invoke a hook (which you shouldn't unless you're part of tcmalloc), +// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h. +// +// NOTE FOR C USERS: If you want to use malloc_hook functionality from +// a C program, #include malloc_hook_c.h instead of this file. + +#ifndef _MALLOC_HOOK_H_ +#define _MALLOC_HOOK_H_ + +#include <stddef.h> +#include <sys/types.h> +extern "C" { +#include "malloc_hook_c.h" // a C version of the malloc_hook interface +} + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +// The C++ methods below call the C version (MallocHook_*), and thus +// convert between an int and a bool. Windows complains about this +// (a "performance warning") which we don't care about, so we suppress. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4800) +#endif + +// Note: malloc_hook_c.h defines MallocHook_*Hook and +// MallocHook_{Add,Remove}*Hook. The version of these inside the MallocHook +// class are defined in terms of the malloc_hook_c version. See malloc_hook_c.h +// for details of these types/functions. + +class PERFTOOLS_DLL_DECL MallocHook { + public: + // The NewHook is invoked whenever an object is allocated. + // It may be passed NULL if the allocator returned NULL. + typedef MallocHook_NewHook NewHook; + inline static bool AddNewHook(NewHook hook) { + return MallocHook_AddNewHook(hook); + } + inline static bool RemoveNewHook(NewHook hook) { + return MallocHook_RemoveNewHook(hook); + } + inline static void InvokeNewHook(const void* p, size_t s); + + // The DeleteHook is invoked whenever an object is deallocated. + // It may be passed NULL if the caller is trying to delete NULL. + typedef MallocHook_DeleteHook DeleteHook; + inline static bool AddDeleteHook(DeleteHook hook) { + return MallocHook_AddDeleteHook(hook); + } + inline static bool RemoveDeleteHook(DeleteHook hook) { + return MallocHook_RemoveDeleteHook(hook); + } + inline static void InvokeDeleteHook(const void* p); + + // The PreMmapHook is invoked with mmap or mmap64 arguments just + // before the call is actually made. Such a hook may be useful + // in memory limited contexts, to catch allocations that will exceed + // a memory limit, and take outside actions to increase that limit. + typedef MallocHook_PreMmapHook PreMmapHook; + inline static bool AddPreMmapHook(PreMmapHook hook) { + return MallocHook_AddPreMmapHook(hook); + } + inline static bool RemovePreMmapHook(PreMmapHook hook) { + return MallocHook_RemovePreMmapHook(hook); + } + inline static void InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MmapReplacement is invoked after the PreMmapHook but before + // the call is actually made. The MmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call mmap/mmap64. + // This should be used only by experts, and users must be be + // extremely careful to avoid recursive calls to mmap. The replacement + // should be async signal safe. + // Only one MmapReplacement is supported. After setting an MmapReplacement + // you must call RemoveMmapReplacement before calling SetMmapReplacement + // again. + typedef MallocHook_MmapReplacement MmapReplacement; + inline static bool SetMmapReplacement(MmapReplacement hook) { + return MallocHook_SetMmapReplacement(hook); + } + inline static bool RemoveMmapReplacement(MmapReplacement hook) { + return MallocHook_RemoveMmapReplacement(hook); + } + inline static bool InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + + + // The MmapHook is invoked whenever a region of memory is mapped. + // It may be passed MAP_FAILED if the mmap failed. + typedef MallocHook_MmapHook MmapHook; + inline static bool AddMmapHook(MmapHook hook) { + return MallocHook_AddMmapHook(hook); + } + inline static bool RemoveMmapHook(MmapHook hook) { + return MallocHook_RemoveMmapHook(hook); + } + inline static void InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MunmapReplacement is invoked with munmap arguments just before + // the call is actually made. The MunmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call munmap. + // This should be used only by experts. The replacement should be + // async signal safe. + // Only one MunmapReplacement is supported. After setting an + // MunmapReplacement you must call RemoveMunmapReplacement before + // calling SetMunmapReplacement again. + typedef MallocHook_MunmapReplacement MunmapReplacement; + inline static bool SetMunmapReplacement(MunmapReplacement hook) { + return MallocHook_SetMunmapReplacement(hook); + } + inline static bool RemoveMunmapReplacement(MunmapReplacement hook) { + return MallocHook_RemoveMunmapReplacement(hook); + } + inline static bool InvokeMunmapReplacement(const void* p, + size_t size, + int* result); + + // The MunmapHook is invoked whenever a region of memory is unmapped. + typedef MallocHook_MunmapHook MunmapHook; + inline static bool AddMunmapHook(MunmapHook hook) { + return MallocHook_AddMunmapHook(hook); + } + inline static bool RemoveMunmapHook(MunmapHook hook) { + return MallocHook_RemoveMunmapHook(hook); + } + inline static void InvokeMunmapHook(const void* p, size_t size); + + // The MremapHook is invoked whenever a region of memory is remapped. + typedef MallocHook_MremapHook MremapHook; + inline static bool AddMremapHook(MremapHook hook) { + return MallocHook_AddMremapHook(hook); + } + inline static bool RemoveMremapHook(MremapHook hook) { + return MallocHook_RemoveMremapHook(hook); + } + inline static void InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + + // The PreSbrkHook is invoked just before sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. It may be useful in memory-limited contexts, + // to catch allocations that will exceed the limit and take outside + // actions to increase such a limit. + typedef MallocHook_PreSbrkHook PreSbrkHook; + inline static bool AddPreSbrkHook(PreSbrkHook hook) { + return MallocHook_AddPreSbrkHook(hook); + } + inline static bool RemovePreSbrkHook(PreSbrkHook hook) { + return MallocHook_RemovePreSbrkHook(hook); + } + inline static void InvokePreSbrkHook(ptrdiff_t increment); + + // The SbrkHook is invoked whenever sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. + typedef MallocHook_SbrkHook SbrkHook; + inline static bool AddSbrkHook(SbrkHook hook) { + return MallocHook_AddSbrkHook(hook); + } + inline static bool RemoveSbrkHook(SbrkHook hook) { + return MallocHook_RemoveSbrkHook(hook); + } + inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment); + + // Get the current stack trace. Try to skip all routines up to and + // and including the caller of MallocHook::Invoke*. + // Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + // as a hint about how many routines to skip if better information + // is not available. + inline static int GetCallerStackTrace(void** result, int max_depth, + int skip_count) { + return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); + } + + // Unhooked versions of mmap() and munmap(). These should be used + // only by experts, since they bypass heapchecking, etc. + // Note: These do not run hooks, but they still use the MmapReplacement + // and MunmapReplacement. + static void* UnhookedMMap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + static int UnhookedMUnmap(void *start, size_t length); + + // The following are DEPRECATED. + inline static NewHook GetNewHook(); + inline static NewHook SetNewHook(NewHook hook) { + return MallocHook_SetNewHook(hook); + } + + inline static DeleteHook GetDeleteHook(); + inline static DeleteHook SetDeleteHook(DeleteHook hook) { + return MallocHook_SetDeleteHook(hook); + } + + inline static PreMmapHook GetPreMmapHook(); + inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) { + return MallocHook_SetPreMmapHook(hook); + } + + inline static MmapHook GetMmapHook(); + inline static MmapHook SetMmapHook(MmapHook hook) { + return MallocHook_SetMmapHook(hook); + } + + inline static MunmapHook GetMunmapHook(); + inline static MunmapHook SetMunmapHook(MunmapHook hook) { + return MallocHook_SetMunmapHook(hook); + } + + inline static MremapHook GetMremapHook(); + inline static MremapHook SetMremapHook(MremapHook hook) { + return MallocHook_SetMremapHook(hook); + } + + inline static PreSbrkHook GetPreSbrkHook(); + inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) { + return MallocHook_SetPreSbrkHook(hook); + } + + inline static SbrkHook GetSbrkHook(); + inline static SbrkHook SetSbrkHook(SbrkHook hook) { + return MallocHook_SetSbrkHook(hook); + } + // End of DEPRECATED methods. + + private: + // Slow path versions of Invoke*Hook. + static void InvokeNewHookSlow(const void* p, size_t s); + static void InvokeDeleteHookSlow(const void* p); + static void InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static void InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static bool InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + static void InvokeMunmapHookSlow(const void* p, size_t size); + static bool InvokeMunmapReplacementSlow(const void* p, + size_t size, + int* result); + static void InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + static void InvokePreSbrkHookSlow(ptrdiff_t increment); + static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment); +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + +#endif /* _MALLOC_HOOK_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/malloc_hook_c.h b/src/third_party/gperftools-2.7/src/gperftools/malloc_hook_c.h new file mode 100644 index 00000000000..56337e15e83 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/malloc_hook_c.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_hook.h. See malloc_hook.h for details + * on how to use these. + */ + +#ifndef _MALLOC_HOOK_C_H_ +#define _MALLOC_HOOK_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Get the current stack trace. Try to skip all routines up to and + * and including the caller of MallocHook::Invoke*. + * Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + * as a hint about how many routines to skip if better information + * is not available. + */ +PERFTOOLS_DLL_DECL +int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count); + +/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on + * failure. + */ + +typedef void (*MallocHook_NewHook)(const void* ptr, size_t size); +PERFTOOLS_DLL_DECL +int MallocHook_AddNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveNewHook(MallocHook_NewHook hook); + +typedef void (*MallocHook_DeleteHook)(const void* ptr); +PERFTOOLS_DLL_DECL +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook); + +typedef void (*MallocHook_PreMmapHook)(const void *start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook); + +typedef void (*MallocHook_MmapHook)(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +PERFTOOLS_DLL_DECL +int MallocHook_AddMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook); + +typedef int (*MallocHook_MmapReplacement)(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook); +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook); + +typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); +PERFTOOLS_DLL_DECL +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook); + +typedef int (*MallocHook_MunmapReplacement)(const void* ptr, + size_t size, + int* result); +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook); +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook); + +typedef void (*MallocHook_MremapHook)(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); +PERFTOOLS_DLL_DECL +int MallocHook_AddMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook); + +typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook); + +typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment); +PERFTOOLS_DLL_DECL +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook); + +/* The following are DEPRECATED. */ +PERFTOOLS_DLL_DECL +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook); +/* End of DEPRECATED functions. */ + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* _MALLOC_HOOK_C_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/nallocx.h b/src/third_party/gperftools-2.7/src/gperftools/nallocx.h new file mode 100644 index 00000000000..01f874ca268 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/nallocx.h @@ -0,0 +1,37 @@ +#ifndef _NALLOCX_H_ +#define _NALLOCX_H_ +#include <stddef.h> + +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define MALLOCX_LG_ALIGN(la) ((int)(la)) + +/* + * The nallocx function allocates no memory, but it performs the same size + * computation as the malloc function, and returns the real size of the + * allocation that would result from the equivalent malloc function call. + * nallocx is a malloc extension originally implemented by jemalloc: + * http://www.unix.com/man-page/freebsd/3/nallocx/ + * + * Note, we only support MALLOCX_LG_ALIGN flag and nothing else. + */ +PERFTOOLS_DLL_DECL size_t nallocx(size_t size, int flags); + +/* same as above but never weak */ +PERFTOOLS_DLL_DECL size_t tc_nallocx(size_t size, int flags); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _NALLOCX_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/profiler.h b/src/third_party/gperftools-2.7/src/gperftools/profiler.h new file mode 100644 index 00000000000..1e7eb127b46 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/profiler.h @@ -0,0 +1,169 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for CPU profiling based on periodic pc-sampling. + * + * For full(er) information, see docs/cpuprofile.html + * + * This module is linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "CPUPROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "ProfilerStart(filename)" and "ProfilerStop()". + * + * + * (Note: if using linux 2.4 or earlier, only the main thread may be + * profiled.) + * + * Use pprof to view the resulting profile output. + * % pprof <path_to_executable> <profile_file_name> + * % pprof --gv <path_to_executable> <profile_file_name> + * + * These functions are thread-safe. + */ + +#ifndef BASE_PROFILER_H_ +#define BASE_PROFILER_H_ + +#include <time.h> /* For time_t */ + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Profiler options, for use with ProfilerStartWithOptions. To use: + * + * struct ProfilerOptions options; + * memset(&options, 0, sizeof options); + * + * then fill in fields as needed. + * + * This structure is intended to be usable from C code, so no constructor + * is provided to initialize it. (Use memset as described above). + */ +struct ProfilerOptions { + /* Filter function and argument. + * + * If filter_in_thread is not NULL, when a profiling tick is delivered + * the profiler will call: + * + * (*filter_in_thread)(filter_in_thread_arg) + * + * If it returns nonzero, the sample will be included in the profile. + * Note that filter_in_thread runs in a signal handler, so must be + * async-signal-safe. + * + * A typical use would be to set up filter results for each thread + * in the system before starting the profiler, then to make + * filter_in_thread be a very simple function which retrieves those + * results in an async-signal-safe way. Retrieval could be done + * using thread-specific data, or using a shared data structure that + * supports async-signal-safe lookups. + */ + int (*filter_in_thread)(void *arg); + void *filter_in_thread_arg; +}; + +/* Start profiling and write profile info into fname, discarding any + * existing profiling data in that file. + * + * This is equivalent to calling ProfilerStartWithOptions(fname, NULL). + */ +PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname); + +/* Start profiling and write profile into fname, discarding any + * existing profiling data in that file. + * + * The profiler is configured using the options given by 'options'. + * Options which are not specified are given default values. + * + * 'options' may be NULL, in which case all are given default values. + * + * Returns nonzero if profiling was started successfully, or zero else. + */ +PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const struct ProfilerOptions *options); + +/* Stop profiling. Can be started again with ProfilerStart(), but + * the currently accumulated profiling data will be cleared. + */ +PERFTOOLS_DLL_DECL void ProfilerStop(void); + +/* Flush any currently buffered profiling state to the profile file. + * Has no effect if the profiler has not been started. + */ +PERFTOOLS_DLL_DECL void ProfilerFlush(void); + + +/* DEPRECATED: these functions were used to enable/disable profiling + * in the current thread, but no longer do anything. + */ +PERFTOOLS_DLL_DECL void ProfilerEnable(void); +PERFTOOLS_DLL_DECL void ProfilerDisable(void); + +/* Returns nonzero if profile is currently enabled, zero if it's not. */ +PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(void); + +/* Routine for registering new threads with the profiler. + */ +PERFTOOLS_DLL_DECL void ProfilerRegisterThread(void); + +/* Stores state about profiler's current status into "*state". */ +struct ProfilerState { + int enabled; /* Is profiling currently enabled? */ + time_t start_time; /* If enabled, when was profiling started? */ + char profile_name[1024]; /* Name of profile file being written, or '\0' */ + int samples_gathered; /* Number of samples gathered so far (or 0) */ +}; +PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_PROFILER_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/stacktrace.h b/src/third_party/gperftools-2.7/src/gperftools/stacktrace.h new file mode 100644 index 00000000000..2b9c5a13209 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/stacktrace.h @@ -0,0 +1,117 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routines to extract the current stack trace. These functions are +// thread-safe. + +#ifndef GOOGLE_STACKTRACE_H_ +#define GOOGLE_STACKTRACE_H_ + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// Skips the most recent "skip_count" stack frames (also skips the +// frame generated for the "GetStackFrames" routine itself), and then +// records the pc values for up to the next "max_depth" frames in +// "result", and the corresponding stack frame sizes in "sizes". +// Returns the number of values recorded in "result"/"sizes". +// +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* result[10]; +// int sizes[10]; +// int depth = GetStackFrames(result, sizes, 10, 1); +// } +// +// The GetStackFrames call will skip the frame for "bar". It will +// return 2 and will produce pc values that map to the following +// procedures: +// result[0] foo +// result[1] main +// (Actually, there may be a few more entries after "main" to account for +// startup procedures.) +// And corresponding stack frame sizes will also be recorded: +// sizes[0] 16 +// sizes[1] 16 +// (Stack frame sizes of 16 above are just for illustration purposes.) +// Stack frame sizes of 0 or less indicate that those frame sizes couldn't +// be identified. +// +// This routine may return fewer stack frame entries than are +// available. Also note that "result" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, + int skip_count, const void *uc); + +// This is similar to the GetStackFrames routine, except that it returns +// the stack trace only, and not the stack frame sizes as well. +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* result[10]; +// int depth = GetStackTrace(result, 10, 1); +// } +// +// This produces: +// result[0] foo +// result[1] main +// .... ... +// +// "result" must not be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void *uc); + +#endif /* GOOGLE_STACKTRACE_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h b/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h new file mode 100644 index 00000000000..a5515a82148 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h @@ -0,0 +1,163 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ +#ifdef __cplusplus +#include <new> /* for std::nothrow_t, std::align_val_t */ +#endif + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 7 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.7" + +/* For struct mallinfo, if it's defined. */ +#ifdef HAVE_STRUCT_MALLINFO +# include <malloc.h> +#endif + +#ifndef PERFTOOLS_NOTHROW + +#if __cplusplus >= 201103L +#define PERFTOOLS_NOTHROW noexcept +#elif defined(__cplusplus) +#define PERFTOOLS_NOTHROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_NOTHROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_NOTHROW +# endif +#endif + +#endif + +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW; +#ifdef HAVE_STRUCT_MALLINFO + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_NOTHROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + +#if 1 && __cplusplus >= 201703L + PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; +#endif +} +#endif + +/* We're only un-defining for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_NOTHROW + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h.in b/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h.in new file mode 100644 index 00000000000..84ac8fa5d8b --- /dev/null +++ b/src/third_party/gperftools-2.7/src/gperftools/tcmalloc.h.in @@ -0,0 +1,163 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ +#ifdef __cplusplus +#include <new> /* for std::nothrow_t, std::align_val_t */ +#endif + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +/* For struct mallinfo, if it's defined. */ +#if @ac_cv_have_struct_mallinfo@ +# include <malloc.h> +#endif + +#ifndef PERFTOOLS_NOTHROW + +#if __cplusplus >= 201103L +#define PERFTOOLS_NOTHROW noexcept +#elif defined(__cplusplus) +#define PERFTOOLS_NOTHROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_NOTHROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_NOTHROW +# endif +#endif + +#endif + +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW; +#if @ac_cv_have_struct_mallinfo@ + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_NOTHROW; +#endif + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + +#if @ac_cv_have_std_align_val_t@ && __cplusplus >= 201703L + PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; +#endif +} +#endif + +/* We're only un-defining for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_NOTHROW + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.7/src/heap-checker-bcad.cc b/src/third_party/gperftools-2.7/src/heap-checker-bcad.cc new file mode 100644 index 00000000000..00efdb7cfd4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-checker-bcad.cc @@ -0,0 +1,93 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// +// A file to ensure that components of heap leak checker run before +// all global object constructors and after all global object +// destructors. +// +// This file must be the last library any binary links against. +// Otherwise, the heap checker may not be able to run early enough to +// catalog all the global objects in your program. If this happens, +// and later in the program you allocate memory and have one of these +// "uncataloged" global objects point to it, the heap checker will +// consider that allocation to be a leak, even though it's not (since +// the allocated object is reachable from global data and hence "live"). + +#include <stdlib.h> // for abort() +#include <gperftools/malloc_extension.h> + +// A dummy variable to refer from heap-checker.cc. This is to make +// sure this file is not optimized out by the linker. +bool heap_leak_checker_bcad_variable; + +extern void HeapLeakChecker_AfterDestructors(); // in heap-checker.cc + +// A helper class to ensure that some components of heap leak checking +// can happen before construction and after destruction +// of all global/static objects. +class HeapLeakCheckerGlobalPrePost { + public: + HeapLeakCheckerGlobalPrePost() { + if (count_ == 0) { + // The 'new int' will ensure that we have run an initial malloc + // hook, which will set up the heap checker via + // MallocHook_InitAtFirstAllocation_HeapLeakChecker. See malloc_hook.cc. + // This is done in this roundabout fashion in order to avoid self-deadlock + // if we directly called HeapLeakChecker_BeforeConstructors here. + delete new int; + // This needs to be called before the first allocation of an STL + // object, but after libc is done setting up threads (because it + // calls setenv, which requires a thread-aware errno). By + // putting it here, we hope it's the first bit of code executed + // after the libc global-constructor code. + MallocExtension::Initialize(); + } + ++count_; + } + ~HeapLeakCheckerGlobalPrePost() { + if (count_ <= 0) abort(); + --count_; + if (count_ == 0) HeapLeakChecker_AfterDestructors(); + } + private: + // Counter of constructions/destructions of objects of this class + // (just in case there are more than one of them). + static int count_; +}; + +int HeapLeakCheckerGlobalPrePost::count_ = 0; + +// The early-construction/late-destruction global object. +static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post; diff --git a/src/third_party/gperftools-2.7/src/heap-checker.cc b/src/third_party/gperftools-2.7/src/heap-checker.cc new file mode 100755 index 00000000000..8e71f58232c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-checker.cc @@ -0,0 +1,2388 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// + +#include "config.h" + +#include <fcntl.h> // for O_RDONLY (we use syscall to do actual reads) +#include <string.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <assert.h> + +#if defined(HAVE_LINUX_PTRACE_H) +#include <linux/ptrace.h> +#endif +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#endif +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +#include <wtypes.h> +#include <winbase.h> +#undef ERROR // windows defines these as macros, which can cause trouble +#undef max +#undef min +#endif + +#include <string> +#include <vector> +#include <map> +#include <set> +#include <algorithm> +#include <functional> + +#include <gperftools/heap-checker.h> + +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <gperftools/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/elfcore.h" // for i386_regs +#include "base/thread_lister.h" +#include "heap-profile-table.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> +#include <gperftools/malloc_extension.h> +#include "maybe_threads.h" +#include "memory_region_map.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" +#include "base/stl_allocator.h" + +using std::string; +using std::basic_string; +using std::pair; +using std::map; +using std::set; +using std::vector; +using std::swap; +using std::make_pair; +using std::min; +using std::max; +using std::less; +using std::char_traits; + +// If current process is being ptrace()d, 'TracerPid' in /proc/self/status +// will be non-zero. +static bool IsDebuggerAttached(void) { // only works under linux, probably + char buf[256]; // TracerPid comes relatively earlier in status output + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) { + return false; // Can't tell for sure. + } + const int len = read(fd, buf, sizeof(buf)); + bool rc = false; + if (len > 0) { + const char *const kTracerPid = "TracerPid:\t"; + buf[len - 1] = '\0'; + const char *p = strstr(buf, kTracerPid); + if (p != NULL) { + rc = (strncmp(p + strlen(kTracerPid), "0\n", 2) != 0); + } + } + close(fd); + return rc; +} + +// This is the default if you don't link in -lprofiler +extern "C" { +ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(); +int ProfilingIsEnabledForAllThreads() { return false; } +} + +//---------------------------------------------------------------------- +// Flags that control heap-checking +//---------------------------------------------------------------------- + +DEFINE_string(heap_check, + EnvToString("HEAPCHECK", ""), + "The heap leak checking to be done over the whole executable: " + "\"minimal\", \"normal\", \"strict\", " + "\"draconian\", \"as-is\", and \"local\" " + " or the empty string are the supported choices. " + "(See HeapLeakChecker_InternalInitStart for details.)"); + +DEFINE_bool(heap_check_report, true, "Obsolete"); + +DEFINE_bool(heap_check_before_constructors, + true, + "deprecated; pretty much always true now"); + +DEFINE_bool(heap_check_after_destructors, + EnvToBool("HEAP_CHECK_AFTER_DESTRUCTORS", false), + "If overall heap check is to end after global destructors " + "or right after all REGISTER_HEAPCHECK_CLEANUP's"); + +DEFINE_bool(heap_check_strict_check, true, "Obsolete"); + +DEFINE_bool(heap_check_ignore_global_live, + EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true), + "If overall heap check is to ignore heap objects reachable " + "from the global data"); + +DEFINE_bool(heap_check_identify_leaks, + EnvToBool("HEAP_CHECK_IDENTIFY_LEAKS", false), + "If heap check should generate the addresses of the leaked " + "objects in the memory leak profiles. This may be useful " + "in tracking down leaks where only a small fraction of " + "objects allocated at the same stack trace are leaked."); + +DEFINE_bool(heap_check_ignore_thread_live, + EnvToBool("HEAP_CHECK_IGNORE_THREAD_LIVE", true), + "If set to true, objects reachable from thread stacks " + "and registers are not reported as leaks"); + +DEFINE_bool(heap_check_test_pointer_alignment, + EnvToBool("HEAP_CHECK_TEST_POINTER_ALIGNMENT", false), + "Set to true to check if the found leak can be due to " + "use of unaligned pointers"); + +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerSourceAlignment = sizeof(void*); +DEFINE_int32(heap_check_pointer_source_alignment, + EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", + kPointerSourceAlignment), + "Alignment at which all pointers in memory are supposed to be " + "located. Use 1 if any alignment is ok."); + +// A reasonable default to handle pointers inside of typical class objects: +// Too low and we won't be able to traverse pointers to normally-used +// nested objects and base parts of multiple-inherited objects. +// Too high and it will both slow down leak checking (FindInsideAlloc +// in HaveOnHeapLocked will get slower when there are large on-heap objects) +// and make it probabilistically more likely to miss leaks +// of large-sized objects. +static const int64 kHeapCheckMaxPointerOffset = 1024; +DEFINE_int64(heap_check_max_pointer_offset, + EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET", + kHeapCheckMaxPointerOffset), + "Largest pointer offset for which we traverse " + "pointers going inside of heap allocated objects. " + "Set to -1 to use the actual largest heap object size."); + +DEFINE_bool(heap_check_run_under_gdb, + EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false), + "If false, turns off heap-checking library when running under gdb " + "(normally, set to 'true' only when debugging the heap-checker)"); + +DEFINE_int32(heap_check_delay_seconds, 0, + "Number of seconds to delay on-exit heap checking." + " If you set this flag," + " you may also want to set exit_timeout_seconds in order to" + " avoid exit timeouts.\n" + "NOTE: This flag is to be used only to help diagnose issues" + " where it is suspected that the heap checker is reporting" + " false leaks that will disappear if the heap checker delays" + " its checks. Report any such issues to the heap-checker" + " maintainer(s)."); + +//---------------------------------------------------------------------- + +DEFINE_string(heap_profile_pprof, + EnvToString("PPROF_PATH", "pprof"), + "OBSOLETE; not used"); + +DEFINE_string(heap_check_dump_directory, + EnvToString("HEAP_CHECK_DUMP_DIRECTORY", "/tmp"), + "Directory to put heap-checker leak dump information"); + + +//---------------------------------------------------------------------- +// HeapLeakChecker global data +//---------------------------------------------------------------------- + +// Global lock for all the global data of this module. +static SpinLock heap_checker_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- + +// Heap profile prefix for leak checking profiles. +// Gets assigned once when leak checking is turned on, then never modified. +static const string* profile_name_prefix = NULL; + +// Whole-program heap leak checker. +// Gets assigned once when leak checking is turned on, +// then main_heap_checker is never deleted. +static HeapLeakChecker* main_heap_checker = NULL; + +// Whether we will use main_heap_checker to do a check at program exit +// automatically. In any case user can ask for more checks on main_heap_checker +// via GlobalChecker(). +static bool do_main_heap_check = false; + +// The heap profile we use to collect info about the heap. +// This is created in HeapLeakChecker::BeforeConstructorsLocked +// together with setting heap_checker_on (below) to true +// and registering our new/delete malloc hooks; +// similarly all are unset in HeapLeakChecker::TurnItselfOffLocked. +static HeapProfileTable* heap_profile = NULL; + +// If we are doing (or going to do) any kind of heap-checking. +static bool heap_checker_on = false; + +// pid of the process that does whole-program heap leak checking +static pid_t heap_checker_pid = 0; + +// If we did heap profiling during global constructors execution +static bool constructor_heap_profiling = false; + +// RAW_VLOG level we dump key INFO messages at. If you want to turn +// off these messages, set the environment variable PERFTOOLS_VERBOSE=-1. +static const int heap_checker_info_level = 0; + +//---------------------------------------------------------------------- +// HeapLeakChecker's own memory allocator that is +// independent of the normal program allocator. +//---------------------------------------------------------------------- + +// Wrapper of LowLevelAlloc for STL_Allocator and direct use. +// We always access this class under held heap_checker_lock, +// this allows us to in particular protect the period when threads are stopped +// at random spots with TCMalloc_ListAllProcessThreads by heap_checker_lock, +// w/o worrying about the lock in LowLevelAlloc::Arena. +// We rely on the fact that we use an own arena with an own lock here. +class HeapLeakChecker::Allocator { + public: + static void Init() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(arena_ == NULL, ""); + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + } + static void Shutdown() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (!LowLevelAlloc::DeleteArena(arena_) || alloc_count_ != 0) { + RAW_LOG(FATAL, "Internal heap checker leak of %d objects", alloc_count_); + } + } + static int alloc_count() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + return alloc_count_; + } + static void* Allocate(size_t n) { + RAW_DCHECK(arena_ && heap_checker_lock.IsHeld(), ""); + void* p = LowLevelAlloc::AllocWithArena(n, arena_); + if (p) alloc_count_ += 1; + return p; + } + static void Free(void* p) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (p) alloc_count_ -= 1; + LowLevelAlloc::Free(p); + } + static void Free(void* p, size_t /* n */) { + Free(p); + } + // destruct, free, and make *p to be NULL + template<typename T> static void DeleteAndNull(T** p) { + (*p)->~T(); + Free(*p); + *p = NULL; + } + template<typename T> static void DeleteAndNullIfNot(T** p) { + if (*p != NULL) DeleteAndNull(p); + } + private: + static LowLevelAlloc::Arena* arena_; + static int alloc_count_; +}; + +LowLevelAlloc::Arena* HeapLeakChecker::Allocator::arena_ = NULL; +int HeapLeakChecker::Allocator::alloc_count_ = 0; + +//---------------------------------------------------------------------- +// HeapLeakChecker live object tracking components +//---------------------------------------------------------------------- + +// Cases of live object placement we distinguish +enum ObjectPlacement { + MUST_BE_ON_HEAP, // Must point to a live object of the matching size in the + // heap_profile map of the heap when we get to it + IGNORED_ON_HEAP, // Is a live (ignored) object on heap + MAYBE_LIVE, // Is a piece of writable memory from /proc/self/maps + IN_GLOBAL_DATA, // Is part of global data region of the executable + THREAD_DATA, // Part of a thread stack and a thread descriptor with TLS + THREAD_REGISTERS, // Values in registers of some thread +}; + +// Information about an allocated object +struct AllocObject { + const void* ptr; // the object + uintptr_t size; // its size + ObjectPlacement place; // where ptr points to + + AllocObject(const void* p, size_t s, ObjectPlacement l) + : ptr(p), size(s), place(l) { } +}; + +// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject +// Key is the object's address; value is its size. +typedef map<uintptr_t, size_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, size_t>, + HeapLeakChecker::Allocator> + > IgnoredObjectsMap; +static IgnoredObjectsMap* ignored_objects = NULL; + +// All objects (memory ranges) that we consider to be the sources of pointers +// to live (not leaked) objects. +// At different times this holds (what can be reached from) global data regions +// and the objects we've been told to ignore. +// For any AllocObject::ptr "live_objects" is supposed to contain at most one +// record at any time. We maintain this by checking with the heap_profile map +// of the heap and removing the live heap objects we've handled from it. +// This vector is maintained as a stack and the frontier of reachable +// live heap objects in our flood traversal of them. +typedef vector<AllocObject, + STL_Allocator<AllocObject, HeapLeakChecker::Allocator> + > LiveObjectsStack; +static LiveObjectsStack* live_objects = NULL; + +// A special string type that uses my allocator +typedef basic_string<char, char_traits<char>, + STL_Allocator<char, HeapLeakChecker::Allocator> + > HCL_string; + +// A placeholder to fill-in the starting values for live_objects +// for each library so we can keep the library-name association for logging. +typedef map<HCL_string, LiveObjectsStack, less<HCL_string>, + STL_Allocator<pair<const HCL_string, LiveObjectsStack>, + HeapLeakChecker::Allocator> + > LibraryLiveObjectsStacks; +static LibraryLiveObjectsStacks* library_live_objects = NULL; + +// Value stored in the map of disabled address ranges; +// its key is the end of the address range. +// We'll ignore allocations with a return address in a disabled range +// if the address occurs at 'max_depth' or less in the stack trace. +struct HeapLeakChecker::RangeValue { + uintptr_t start_address; // the start of the range + int max_depth; // the maximal stack depth to disable at +}; +typedef map<uintptr_t, HeapLeakChecker::RangeValue, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, HeapLeakChecker::RangeValue>, + HeapLeakChecker::Allocator> + > DisabledRangeMap; +// The disabled program counter address ranges for profile dumping +// that are registered with HeapLeakChecker::DisableChecksFromToLocked. +static DisabledRangeMap* disabled_ranges = NULL; + +// Set of stack tops. +// These are used to consider live only appropriate chunks of the memory areas +// that are used for stacks (and maybe thread-specific data as well) +// so that we do not treat pointers from outdated stack frames as live. +typedef set<uintptr_t, less<uintptr_t>, + STL_Allocator<uintptr_t, HeapLeakChecker::Allocator> + > StackTopSet; +static StackTopSet* stack_tops = NULL; + +// A map of ranges of code addresses for the system libraries +// that can mmap/mremap/sbrk-allocate memory regions for stacks +// and thread-local storage that we want to consider as live global data. +// Maps from the end address to the start address. +typedef map<uintptr_t, uintptr_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, uintptr_t>, + HeapLeakChecker::Allocator> + > GlobalRegionCallerRangeMap; +static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL; + +// TODO(maxim): make our big data structs into own modules + +// Disabler is implemented by keeping track of a per-thread count +// of active Disabler objects. Any objects allocated while the +// count > 0 are not reported. + +#ifdef HAVE_TLS + +static __thread int thread_disable_counter +// The "inital exec" model is faster than the default TLS model, at +// the cost you can't dlopen this library. But dlopen on heap-checker +// doesn't work anyway -- it must run before main -- so this is a good +// trade-off. +# ifdef HAVE___ATTRIBUTE__ + __attribute__ ((tls_model ("initial-exec"))) +# endif + ; +inline int get_thread_disable_counter() { + return thread_disable_counter; +} +inline void set_thread_disable_counter(int value) { + thread_disable_counter = value; +} + +#else // #ifdef HAVE_TLS + +static pthread_key_t thread_disable_counter_key; +static int main_thread_counter; // storage for use before main() +static bool use_main_thread_counter = true; + +// TODO(csilvers): this is called from NewHook, in the middle of malloc(). +// If perftools_pthread_getspecific calls malloc, that will lead to an +// infinite loop. I don't know how to fix that, so I hope it never happens! +inline int get_thread_disable_counter() { + if (use_main_thread_counter) // means we're running really early + return main_thread_counter; + void* p = perftools_pthread_getspecific(thread_disable_counter_key); + return (intptr_t)p; // kinda evil: store the counter directly in the void* +} + +inline void set_thread_disable_counter(int value) { + if (use_main_thread_counter) { // means we're running really early + main_thread_counter = value; + return; + } + intptr_t pointer_sized_value = value; + // kinda evil: store the counter directly in the void* + void* p = (void*)pointer_sized_value; + // NOTE: this may call malloc, which will call NewHook which will call + // get_thread_disable_counter() which will call pthread_getspecific(). I + // don't know if anything bad can happen if we call getspecific() in the + // middle of a setspecific() call. It seems to work ok in practice... + perftools_pthread_setspecific(thread_disable_counter_key, p); +} + +// The idea here is that this initializer will run pretty late: after +// pthreads have been totally set up. At this point we can call +// pthreads routines, so we set those up. +class InitThreadDisableCounter { + public: + InitThreadDisableCounter() { + perftools_pthread_key_create(&thread_disable_counter_key, NULL); + // Set up the main thread's value, which we have a special variable for. + void* p = (void*)(intptr_t)main_thread_counter; // store the counter directly + perftools_pthread_setspecific(thread_disable_counter_key, p); + use_main_thread_counter = false; + } +}; +InitThreadDisableCounter init_thread_disable_counter; + +#endif // #ifdef HAVE_TLS + +HeapLeakChecker::Disabler::Disabler() { + // It is faster to unconditionally increment the thread-local + // counter than to check whether or not heap-checking is on + // in a thread-safe manner. + int counter = get_thread_disable_counter(); + set_thread_disable_counter(counter + 1); + RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1); +} + +HeapLeakChecker::Disabler::~Disabler() { + int counter = get_thread_disable_counter(); + RAW_DCHECK(counter > 0, ""); + if (counter > 0) { + set_thread_disable_counter(counter - 1); + RAW_VLOG(10, "Decreasing thread disable counter to %d", counter); + } else { + RAW_VLOG(0, "Thread disable counter underflow : %d", counter); + } +} + +//---------------------------------------------------------------------- + +// The size of the largest heap object allocated so far. +static size_t max_heap_object_size = 0; +// The possible range of addresses that can point +// into one of the elements of heap_objects. +static uintptr_t min_heap_address = uintptr_t(-1LL); +static uintptr_t max_heap_address = 0; + +//---------------------------------------------------------------------- + +// Simple casting helpers for uintptr_t and void*: +template<typename T> +inline static const void* AsPtr(T addr) { + return reinterpret_cast<void*>(addr); +} +inline static uintptr_t AsInt(const void* ptr) { + return reinterpret_cast<uintptr_t>(ptr); +} + +//---------------------------------------------------------------------- + +// We've seen reports that strstr causes heap-checker crashes in some +// libc's (?): +// http://code.google.com/p/gperftools/issues/detail?id=263 +// It's simple enough to use our own. This is not in time-critical code. +static const char* hc_strstr(const char* s1, const char* s2) { + const size_t len = strlen(s2); + RAW_CHECK(len > 0, "Unexpected empty string passed to strstr()"); + for (const char* p = strchr(s1, *s2); p != NULL; p = strchr(p+1, *s2)) { + if (strncmp(p, s2, len) == 0) { + return p; + } + } + return NULL; +} + +//---------------------------------------------------------------------- + +// Our hooks for MallocHook +static void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) { + const int counter = get_thread_disable_counter(); + const bool ignore = (counter > 0); + RAW_VLOG(16, "Recording Alloc: %p of %" PRIuS "; %d", ptr, size, + int(counter)); + + // Fetch the caller's stack trace before acquiring heap_checker_lock. + void* stack[HeapProfileTable::kMaxStackDepth]; + int depth = HeapProfileTable::GetCallerStackTrace(0, stack); + + { SpinLockHolder l(&heap_checker_lock); + if (size > max_heap_object_size) max_heap_object_size = size; + uintptr_t addr = AsInt(ptr); + if (addr < min_heap_address) min_heap_address = addr; + addr += size; + if (addr > max_heap_address) max_heap_address = addr; + if (heap_checker_on) { + heap_profile->RecordAlloc(ptr, size, depth, stack); + if (ignore) { + heap_profile->MarkAsIgnored(ptr); + } + } + } + RAW_VLOG(17, "Alloc Recorded: %p of %" PRIuS "", ptr, size); + } +} + +static void DeleteHook(const void* ptr) { + if (ptr != NULL) { + RAW_VLOG(16, "Recording Free %p", ptr); + { SpinLockHolder l(&heap_checker_lock); + if (heap_checker_on) heap_profile->RecordFree(ptr); + } + RAW_VLOG(17, "Free Recorded: %p", ptr); + } +} + +//---------------------------------------------------------------------- + +enum StackDirection { + GROWS_TOWARDS_HIGH_ADDRESSES, + GROWS_TOWARDS_LOW_ADDRESSES, + UNKNOWN_DIRECTION +}; + +// Determine which way the stack grows: + +static StackDirection ATTRIBUTE_NOINLINE GetStackDirection( + const uintptr_t *const ptr) { + uintptr_t x; + if (&x < ptr) + return GROWS_TOWARDS_LOW_ADDRESSES; + if (ptr < &x) + return GROWS_TOWARDS_HIGH_ADDRESSES; + + RAW_CHECK(0, ""); // Couldn't determine the stack direction. + + return UNKNOWN_DIRECTION; +} + +// Direction of stack growth (will initialize via GetStackDirection()) +static StackDirection stack_direction = UNKNOWN_DIRECTION; + +// This routine is called for every thread stack we know about to register it. +static void RegisterStackLocked(const void* top_ptr) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(10, "Thread stack at %p", top_ptr); + uintptr_t top = AsInt(top_ptr); + stack_tops->insert(top); // add for later use + + // make sure stack_direction is initialized + if (stack_direction == UNKNOWN_DIRECTION) { + stack_direction = GetStackDirection(&top); + } + + // Find memory region with this stack + MemoryRegionMap::Region region; + if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + top_ptr, region.end_addr - top); + live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, + THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + AsPtr(region.start_addr), + top - region.start_addr); + live_objects->push_back(AllocObject(AsPtr(region.start_addr), + top - region.start_addr, + THREAD_DATA)); + } + // not in MemoryRegionMap, look in library_live_objects: + } else if (FLAGS_heap_check_ignore_global_live) { + for (LibraryLiveObjectsStacks::iterator lib = library_live_objects->begin(); + lib != library_live_objects->end(); ++lib) { + for (LiveObjectsStack::iterator span = lib->second.begin(); + span != lib->second.end(); ++span) { + uintptr_t start = AsInt(span->ptr); + uintptr_t end = start + span->size; + if (start <= top && top < end) { + RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p", + top_ptr, AsPtr(start), AsPtr(end)); + // Shrink start..end region by chopping away the memory regions in + // MemoryRegionMap that land in it to undo merging of regions + // in /proc/self/maps, so that we correctly identify what portion + // of start..end is actually the stack region. + uintptr_t stack_start = start; + uintptr_t stack_end = end; + // can optimize-away this loop, but it does not run often + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + for (MemoryRegionMap::RegionIterator r = + MemoryRegionMap::BeginRegionLocked(); + r != MemoryRegionMap::EndRegionLocked(); ++r) { + if (top < r->start_addr && r->start_addr < stack_end) { + stack_end = r->start_addr; + } + if (stack_start < r->end_addr && r->end_addr <= top) { + stack_start = r->end_addr; + } + } + if (stack_start != start || stack_end != end) { + RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p", + top_ptr, AsPtr(stack_start), AsPtr(stack_end)); + } + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + top_ptr, stack_end - top); + live_objects->push_back( + AllocObject(top_ptr, stack_end - top, THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes", + AsPtr(stack_start), top - stack_start); + live_objects->push_back( + AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); + } + lib->second.erase(span); // kill the rest of the region + // Put the non-stack part(s) of the region back: + if (stack_start != start) { + lib->second.push_back(AllocObject(AsPtr(start), stack_start - start, + MAYBE_LIVE)); + } + if (stack_end != end) { + lib->second.push_back(AllocObject(AsPtr(stack_end), end - stack_end, + MAYBE_LIVE)); + } + return; + } + } + } + RAW_LOG(ERROR, "Memory region for stack at %p not found. " + "Will likely report false leak positives.", top_ptr); + } +} + +// Iterator for heap allocation map data to make ignored objects "live" +// (i.e., treated as roots for the mark-and-sweep phase) +static void MakeIgnoredObjectsLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (info.ignored) { + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +// Iterator for heap allocation map data to make objects allocated from +// disabled regions of code to be live. +static void MakeDisabledLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + bool stack_disable = false; + bool range_disable = false; + for (int depth = 0; depth < info.stack_depth; depth++) { + uintptr_t addr = AsInt(info.call_stack[depth]); + if (disabled_ranges) { + DisabledRangeMap::const_iterator iter + = disabled_ranges->upper_bound(addr); + if (iter != disabled_ranges->end()) { + RAW_DCHECK(iter->first > addr, ""); + if (iter->second.start_address < addr && + iter->second.max_depth > depth) { + range_disable = true; // in range; dropping + break; + } + } + } + } + if (stack_disable || range_disable) { + uintptr_t start_address = AsInt(ptr); + uintptr_t end_address = start_address + info.object_size; + StackTopSet::const_iterator iter + = stack_tops->lower_bound(start_address); + if (iter != stack_tops->end()) { + RAW_DCHECK(*iter >= start_address, ""); + if (*iter < end_address) { + // We do not disable (treat as live) whole allocated regions + // if they are used to hold thread call stacks + // (i.e. when we find a stack inside). + // The reason is that we'll treat as live the currently used + // stack portions anyway (see RegisterStackLocked), + // and the rest of the region where the stack lives can well + // contain outdated stack variables which are not live anymore, + // hence should not be treated as such. + RAW_VLOG(11, "Not %s-disabling %" PRIuS " bytes at %p" + ": have stack inside: %p", + (stack_disable ? "stack" : "range"), + info.object_size, ptr, AsPtr(*iter)); + return; + } + } + RAW_VLOG(11, "%s-disabling %" PRIuS " bytes at %p", + (stack_disable ? "Stack" : "Range"), info.object_size, ptr); + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +static const char kUnnamedProcSelfMapEntry[] = "UNNAMED"; + +// This function takes some fields from a /proc/self/maps line: +// +// start_address start address of a memory region. +// end_address end address of a memory region +// permissions rwx + private/shared bit +// filename filename of the mapped file +// +// If the region is not writeable, then it cannot have any heap +// pointers in it, otherwise we record it as a candidate live region +// to get filtered later. +static void RecordGlobalDataLocked(uintptr_t start_address, + uintptr_t end_address, + const char* permissions, + const char* filename) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Ignore non-writeable regions. + if (strchr(permissions, 'w') == NULL) return; + if (filename == NULL || *filename == '\0') { + filename = kUnnamedProcSelfMapEntry; + } + RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + filename, start_address, end_address); + (*library_live_objects)[filename]. + push_back(AllocObject(AsPtr(start_address), + end_address - start_address, + MAYBE_LIVE)); +} + +// See if 'library' from /proc/self/maps has base name 'library_base' +// i.e. contains it and has '.' or '-' after it. +static bool IsLibraryNamed(const char* library, const char* library_base) { + const char* p = hc_strstr(library, library_base); + size_t sz = strlen(library_base); + return p != NULL && (p[sz] == '.' || p[sz] == '-'); +} + +// static +void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int depth = 0; + // TODO(maxim): maybe this should be extended to also use objdump + // and pick the text portion of the library more precisely. + if (IsLibraryNamed(library, "/libpthread") || + // libpthread has a lot of small "system" leaks we don't care about. + // In particular it allocates memory to store data supplied via + // pthread_setspecific (which can be the only pointer to a heap object). + IsLibraryNamed(library, "/libdl") || + // library loaders leak some "system" heap that we don't care about + IsLibraryNamed(library, "/libcrypto") || + // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer + // (any library can be, of course, but this one often is because speed + // is so important for making crypto usable). We ignore all its + // allocations because we can't see the call stacks. We'd prefer + // to ignore allocations done in files/symbols that match + // "default_malloc_ex|default_realloc_ex" + // but that doesn't work when the end-result binary is stripped. + IsLibraryNamed(library, "/libjvm") || + // JVM has a lot of leaks we don't care about. + IsLibraryNamed(library, "/libzip") + // The JVM leaks java.util.zip.Inflater after loading classes. + ) { + depth = 1; // only disable allocation calls directly from the library code + } else if (IsLibraryNamed(library, "/ld") + // library loader leaks some "system" heap + // (e.g. thread-local storage) that we don't care about + ) { + depth = 2; // disable allocation calls directly from the library code + // and at depth 2 from it. + // We need depth 2 here solely because of a libc bug that + // forces us to jump through __memalign_hook and MemalignOverride hoops + // in tcmalloc.cc. + // Those buggy __libc_memalign() calls are in ld-linux.so and happen for + // thread-local storage allocations that we want to ignore here. + // We go with the depth-2 hack as a workaround for this libc bug: + // otherwise we'd need to extend MallocHook interface + // so that correct stack depth adjustment can be propagated from + // the exceptional case of MemalignOverride. + // Using depth 2 here should not mask real leaks because ld-linux.so + // does not call user code. + } + if (depth) { + RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth); + DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); + if (IsLibraryNamed(library, "/libpthread") || + IsLibraryNamed(library, "/libdl") || + IsLibraryNamed(library, "/ld")) { + RAW_VLOG(10, "Global memory regions made by %s will be live data", + library); + if (global_region_caller_ranges == NULL) { + global_region_caller_ranges = + new(Allocator::Allocate(sizeof(GlobalRegionCallerRangeMap))) + GlobalRegionCallerRangeMap; + } + global_region_caller_ranges + ->insert(make_pair(end_address, start_address)); + } + } +} + +// static +HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( + ProcMapsTask proc_maps_task) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Need to provide own scratch memory to ProcMapsIterator: + ProcMapsIterator::Buffer buffer; + ProcMapsIterator it(0, &buffer); + if (!it.Valid()) { + int errsv = errno; + RAW_LOG(ERROR, "Could not open /proc/self/maps: errno=%d. " + "Libraries will not be handled correctly.", errsv); + return CANT_OPEN_PROC_MAPS; + } + uint64 start_address, end_address, file_offset; + int64 inode; + char *permissions, *filename; + bool saw_shared_lib = false; + bool saw_nonzero_inode = false; + bool saw_shared_lib_with_nonzero_inode = false; + while (it.Next(&start_address, &end_address, &permissions, + &file_offset, &inode, &filename)) { + if (start_address >= end_address) { + // Warn if a line we can be interested in is ill-formed: + if (inode != 0) { + RAW_LOG(ERROR, "Errors reading /proc/self/maps. " + "Some global memory regions will not " + "be handled correctly."); + } + // Silently skip other ill-formed lines: some are possible + // probably due to the interplay of how /proc/self/maps is updated + // while we read it in chunks in ProcMapsIterator and + // do things in this loop. + continue; + } + // Determine if any shared libraries are present (this is the same + // list of extensions as is found in pprof). We want to ignore + // 'fake' libraries with inode 0 when determining. However, some + // systems don't share inodes via /proc, so we turn off this check + // if we don't see any evidence that we're getting inode info. + if (inode != 0) { + saw_nonzero_inode = true; + } + if ((hc_strstr(filename, "lib") && hc_strstr(filename, ".so")) || + hc_strstr(filename, ".dll") || + // not all .dylib filenames start with lib. .dylib is big enough + // that we are unlikely to get false matches just checking that. + hc_strstr(filename, ".dylib") || hc_strstr(filename, ".bundle")) { + saw_shared_lib = true; + if (inode != 0) { + saw_shared_lib_with_nonzero_inode = true; + } + } + + switch (proc_maps_task) { + case DISABLE_LIBRARY_ALLOCS: + // All lines starting like + // "401dc000-4030f000 r??p 00132000 03:01 13991972 lib/bin" + // identify a data and code sections of a shared library or our binary + if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) { + DisableLibraryAllocsLocked(filename, start_address, end_address); + } + break; + case RECORD_GLOBAL_DATA: + RecordGlobalDataLocked(start_address, end_address, + permissions, filename); + break; + default: + RAW_CHECK(0, ""); + } + } + // If /proc/self/maps is reporting inodes properly (we saw a + // non-zero inode), then we only say we saw a shared lib if we saw a + // 'real' one, with a non-zero inode. + if (saw_nonzero_inode) { + saw_shared_lib = saw_shared_lib_with_nonzero_inode; + } + if (!saw_shared_lib) { + RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak " + "positives for statically linked executables."); + return NO_SHARED_LIBS_IN_PROC_MAPS; + } + return PROC_MAPS_USED; +} + +// Total number and size of live objects dropped from the profile; +// (re)initialized in IgnoreAllLiveObjectsLocked. +static int64 live_objects_total; +static int64 live_bytes_total; + +// pid of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static pid_t self_thread_pid = 0; + +// Status of our thread listing callback execution +// (protected by our lock; used from within IgnoreAllLiveObjectsLocked) +static enum { + CALLBACK_NOT_STARTED, + CALLBACK_STARTED, + CALLBACK_COMPLETED, +} thread_listing_status = CALLBACK_NOT_STARTED; + +// Ideally to avoid deadlocks this function should not result in any libc +// or other function calls that might need to lock a mutex: +// It is called when all threads of a process are stopped +// at arbitrary points thus potentially holding those locks. +// +// In practice we are calling some simple i/o and sprintf-type library functions +// for logging messages, but use only our own LowLevelAlloc::Arena allocator. +// +// This is known to be buggy: the library i/o function calls are able to cause +// deadlocks when they request a lock that a stopped thread happens to hold. +// This issue as far as we know have so far not resulted in any deadlocks +// in practice, so for now we are taking our chance that the deadlocks +// have insignificant frequency. +// +// If such deadlocks become a problem we should make the i/o calls +// into appropriately direct system calls (or eliminate them), +// in particular write() is not safe and vsnprintf() is potentially dangerous +// due to reliance on locale functions (these are called through RAW_LOG +// and in other ways). +// + +#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) +# if (defined(__i386__) || defined(__x86_64)) +# define THREAD_REGS i386_regs +# elif defined(__PPC__) +# define THREAD_REGS ppc_regs +# endif +#endif + +/*static*/ int HeapLeakChecker::IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list /*ap*/) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + thread_listing_status = CALLBACK_STARTED; + RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid()); + + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + + // We put the registers from other threads here + // to make pointers stored in them live. + vector<void*, STL_Allocator<void*, Allocator> > thread_registers; + + int failures = 0; + for (int i = 0; i < num_threads; ++i) { + // the leak checking thread itself is handled + // specially via self_thread_stack, not here: + if (thread_pids[i] == self_thread_pid) continue; + RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); +#ifdef THREAD_REGS + THREAD_REGS thread_regs; +#define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) + // We use sys_ptrace to avoid thread locking + // because this is called from TCMalloc_ListAllProcessThreads + // when all but this thread are suspended. + if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) { + // Need to use SP to get all the data from the very last stack frame: + COMPILE_ASSERT(sizeof(thread_regs.SP) == sizeof(void*), + SP_register_does_not_look_like_a_pointer); + RegisterStackLocked(reinterpret_cast<void*>(thread_regs.SP)); + // Make registers live (just in case PTRACE_ATTACH resulted in some + // register pointers still being in the registers and not on the stack): + for (void** p = reinterpret_cast<void**>(&thread_regs); + p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { + RAW_VLOG(12, "Thread register %p", *p); + thread_registers.push_back(*p); + } + } else { + failures += 1; + } +#else + failures += 1; +#endif + } + // Use all the collected thread (stack) liveness sources: + IgnoreLiveObjectsLocked("threads stack data", ""); + if (thread_registers.size()) { + // Make thread registers be live heap data sources. + // we rely here on the fact that vector is in one memory chunk: + RAW_VLOG(11, "Live registers at %p of %" PRIuS " bytes", + &thread_registers[0], thread_registers.size() * sizeof(void*)); + live_objects->push_back(AllocObject(&thread_registers[0], + thread_registers.size() * sizeof(void*), + THREAD_REGISTERS)); + IgnoreLiveObjectsLocked("threads register data", ""); + } + // Do all other liveness walking while all threads are stopped: + IgnoreNonThreadLiveObjectsLocked(); + // Can now resume the threads: + TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids); + thread_listing_status = CALLBACK_COMPLETED; + return failures; +} + +// Stack top of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static const void* self_thread_stack_top; + +// static +void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid); + // Register our own stack: + + // Important that all stack ranges (including the one here) + // are known before we start looking at them + // in MakeDisabledLiveCallbackLocked: + RegisterStackLocked(self_thread_stack_top); + IgnoreLiveObjectsLocked("stack data", ""); + + // Make objects we were told to ignore live: + if (ignored_objects) { + for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); + object != ignored_objects->end(); ++object) { + const void* ptr = AsPtr(object->first); + RAW_VLOG(11, "Ignored live object at %p of %" PRIuS " bytes", + ptr, object->second); + live_objects-> + push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); + // we do this liveness check for ignored_objects before doing any + // live heap walking to make sure it does not fail needlessly: + size_t object_size; + if (!(heap_profile->FindAlloc(ptr, &object_size) && + object->second == object_size)) { + RAW_LOG(FATAL, "Object at %p of %" PRIuS " bytes from an" + " IgnoreObject() has disappeared", ptr, object->second); + } + } + IgnoreLiveObjectsLocked("ignored objects", ""); + } + + // Treat objects that were allocated when a Disabler was live as + // roots. I.e., if X was allocated while a Disabler was active, + // and Y is reachable from X, arrange that neither X nor Y are + // treated as leaks. + heap_profile->IterateAllocs(MakeIgnoredObjectsLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled objects", ""); + + // Make code-address-disabled objects live and ignored: + // This in particular makes all thread-specific data live + // because the basic data structure to hold pointers to thread-specific data + // is allocated from libpthreads and we have range-disabled that + // library code with UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + // so now we declare all thread-specific data reachable from there as live. + heap_profile->IterateAllocs(MakeDisabledLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled code", ""); + + // Actually make global data live: + if (FLAGS_heap_check_ignore_global_live) { + bool have_null_region_callers = false; + for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin(); + l != library_live_objects->end(); ++l) { + RAW_CHECK(live_objects->empty(), ""); + // Process library_live_objects in l->second + // filtering them by MemoryRegionMap: + // It's safe to iterate over MemoryRegionMap + // w/o locks here as we are inside MemoryRegionMap::Lock(): + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + // The only change to MemoryRegionMap possible in this loop + // is region addition as a result of allocating more memory + // for live_objects. This won't invalidate the RegionIterator + // or the intent of the loop. + // --see the comment by MemoryRegionMap::BeginRegionLocked(). + for (MemoryRegionMap::RegionIterator region = + MemoryRegionMap::BeginRegionLocked(); + region != MemoryRegionMap::EndRegionLocked(); ++region) { + // "region" from MemoryRegionMap is to be subtracted from + // (tentatively live) regions in l->second + // if it has a stack inside or it was allocated by + // a non-special caller (not one covered by a range + // in global_region_caller_ranges). + // This will in particular exclude all memory chunks used + // by the heap itself as well as what's been allocated with + // any allocator on top of mmap. + bool subtract = true; + if (!region->is_stack && global_region_caller_ranges) { + if (region->caller() == static_cast<uintptr_t>(NULL)) { + have_null_region_callers = true; + } else { + GlobalRegionCallerRangeMap::const_iterator iter + = global_region_caller_ranges->upper_bound(region->caller()); + if (iter != global_region_caller_ranges->end()) { + RAW_DCHECK(iter->first > region->caller(), ""); + if (iter->second < region->caller()) { // in special region + subtract = false; + } + } + } + } + if (subtract) { + // The loop puts the result of filtering l->second into live_objects: + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + // subtract *region from *i + uintptr_t start = AsInt(i->ptr); + uintptr_t end = start + i->size; + if (region->start_addr <= start && end <= region->end_addr) { + // full deletion due to subsumption + } else if (start < region->start_addr && + region->end_addr < end) { // cutting-out split + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->end_addr > start && + region->start_addr <= start) { // cut from start + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->start_addr > start && + region->start_addr < end) { // cut from end + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + } else { // pass: no intersection + live_objects->push_back(AllocObject(i->ptr, i->size, + IN_GLOBAL_DATA)); + } + } + // Move live_objects back into l->second + // for filtering by the next region. + live_objects->swap(l->second); + live_objects->clear(); + } + } + // Now get and use live_objects from the final version of l->second: + if (VLOG_IS_ON(11)) { + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + RAW_VLOG(11, "Library live region at %p of %" PRIuPTR " bytes", + i->ptr, i->size); + } + } + live_objects->swap(l->second); + IgnoreLiveObjectsLocked("in globals of\n ", l->first.c_str()); + } + if (have_null_region_callers) { + RAW_LOG(ERROR, "Have memory regions w/o callers: " + "might report false leaks"); + } + Allocator::DeleteAndNull(&library_live_objects); + } +} + +// Callback for TCMalloc_ListAllProcessThreads in IgnoreAllLiveObjectsLocked below +// to test/verify that we have just the one main thread, in which case +// we can do everything in that main thread, +// so that CPU profiler can collect all its samples. +// Returns the number of threads in the process. +static int IsOneThread(void* parameter, int num_threads, + pid_t* thread_pids, va_list ap) { + if (num_threads != 1) { + RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of leak " + "checking work happening in IgnoreLiveThreadsLocked!"); + } + TCMalloc_ResumeAllProcessThreads(num_threads, thread_pids); + return num_threads; +} + +// Dummy for IgnoreAllLiveObjectsLocked below. +// Making it global helps with compiler warnings. +static va_list dummy_ap; + +// static +void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(live_objects == NULL, ""); + live_objects = new(Allocator::Allocate(sizeof(LiveObjectsStack))) + LiveObjectsStack; + stack_tops = new(Allocator::Allocate(sizeof(StackTopSet))) StackTopSet; + // reset the counts + live_objects_total = 0; + live_bytes_total = 0; + // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset + // for the time of leak check. + // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size + // to manage reasonably low chances of random bytes + // appearing to be pointing into large actually leaked heap objects. + const size_t old_max_heap_object_size = max_heap_object_size; + max_heap_object_size = ( + FLAGS_heap_check_max_pointer_offset != -1 + ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size) + : max_heap_object_size); + // Record global data as live: + if (FLAGS_heap_check_ignore_global_live) { + library_live_objects = + new(Allocator::Allocate(sizeof(LibraryLiveObjectsStacks))) + LibraryLiveObjectsStacks; + } + // Ignore all thread stacks: + thread_listing_status = CALLBACK_NOT_STARTED; + bool need_to_ignore_non_thread_objects = true; + self_thread_pid = getpid(); + self_thread_stack_top = self_stack_top; + if (FLAGS_heap_check_ignore_thread_live) { + // In case we are doing CPU profiling we'd like to do all the work + // in the main thread, not in the special thread created by + // TCMalloc_ListAllProcessThreads, so that CPU profiler can + // collect all its samples. The machinery of + // TCMalloc_ListAllProcessThreads conflicts with the CPU profiler + // by also relying on signals and ::sigaction. We can do this + // (run everything in the main thread) safely only if there's just + // the main thread itself in our process. This variable reflects + // these two conditions: + bool want_and_can_run_in_main_thread = + ProfilingIsEnabledForAllThreads() && + TCMalloc_ListAllProcessThreads(NULL, IsOneThread) == 1; + // When the normal path of TCMalloc_ListAllProcessThreads below is taken, + // we fully suspend the threads right here before any liveness checking + // and keep them suspended for the whole time of liveness checking + // inside of the IgnoreLiveThreadsLocked callback. + // (The threads can't (de)allocate due to lock on the delete hook but + // if not suspended they could still mess with the pointer + // graph while we walk it). + int r = want_and_can_run_in_main_thread + ? IgnoreLiveThreadsLocked(NULL, 1, &self_thread_pid, dummy_ap) + : TCMalloc_ListAllProcessThreads(NULL, IgnoreLiveThreadsLocked); + need_to_ignore_non_thread_objects = r < 0; + if (r < 0) { + RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno); + if (thread_listing_status == CALLBACK_COMPLETED) { + RAW_LOG(INFO, "Thread finding callback " + "finished ok; hopefully everything is fine"); + need_to_ignore_non_thread_objects = false; + } else if (thread_listing_status == CALLBACK_STARTED) { + RAW_LOG(FATAL, "Thread finding callback was " + "interrupted or crashed; can't fix this"); + } else { // CALLBACK_NOT_STARTED + RAW_LOG(ERROR, "Could not find thread stacks. " + "Will likely report false leak positives."); + } + } else if (r != 0) { + RAW_LOG(ERROR, "Thread stacks not found for %d threads. " + "Will likely report false leak positives.", r); + } else { + RAW_VLOG(11, "Thread stacks appear to be found for all threads"); + } + } else { + RAW_LOG(WARNING, "Not looking for thread stacks; " + "objects reachable only from there " + "will be reported as leaks"); + } + // Do all other live data ignoring here if we did not do it + // within thread listing callback with all threads stopped. + if (need_to_ignore_non_thread_objects) { + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + IgnoreNonThreadLiveObjectsLocked(); + } + if (live_objects_total) { + RAW_VLOG(10, "Ignoring %" PRId64 " reachable objects of %" PRId64 " bytes", + live_objects_total, live_bytes_total); + } + // Free these: we made them here and heap_profile never saw them + Allocator::DeleteAndNull(&live_objects); + Allocator::DeleteAndNull(&stack_tops); + max_heap_object_size = old_max_heap_object_size; // reset this var +} + +// Alignment at which we should consider pointer positions +// in IgnoreLiveObjectsLocked. Will normally use the value of +// FLAGS_heap_check_pointer_source_alignment. +static size_t pointer_source_alignment = kPointerSourceAlignment; +// Global lock for HeapLeakChecker::DoNoLeaks +// to protect pointer_source_alignment. +static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); + +// This function changes the live bits in the heap_profile-table's state: +// we only record the live objects to be skipped. +// +// When checking if a byte sequence points to a heap object we use +// HeapProfileTable::FindInsideAlloc to handle both pointers to +// the start and inside of heap-allocated objects. +// The "inside" case needs to be checked to support +// at least the following relatively common cases: +// - C++ arrays allocated with new FooClass[size] for classes +// with destructors have their size recorded in a sizeof(int) field +// before the place normal pointers point to. +// - basic_string<>-s for e.g. the C++ library of gcc 3.4 +// have the meta-info in basic_string<...>::_Rep recorded +// before the place normal pointers point to. +// - Multiple-inherited objects have their pointers when cast to +// different base classes pointing inside of the actually +// allocated object. +// - Sometimes reachability pointers point to member objects of heap objects, +// and then those member objects point to the full heap object. +// - Third party UnicodeString: it stores a 32-bit refcount +// (in both 32-bit and 64-bit binaries) as the first uint32 +// in the allocated memory and a normal pointer points at +// the second uint32 behind the refcount. +// By finding these additional objects here +// we slightly increase the chance to mistake random memory bytes +// for a pointer and miss a leak in a particular run of a binary. +// +/*static*/ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, + const char* name2) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int64 live_object_count = 0; + int64 live_byte_count = 0; + while (!live_objects->empty()) { + const char* object = + reinterpret_cast<const char*>(live_objects->back().ptr); + size_t size = live_objects->back().size; + const ObjectPlacement place = live_objects->back().place; + live_objects->pop_back(); + if (place == MUST_BE_ON_HEAP && heap_profile->MarkAsLive(object)) { + live_object_count += 1; + live_byte_count += size; + } + RAW_VLOG(13, "Looking for heap pointers in %p of %" PRIuS " bytes", + object, size); + const char* const whole_object = object; + size_t const whole_size = size; + // Try interpretting any byte sequence in object,size as a heap pointer: + const size_t remainder = AsInt(object) % pointer_source_alignment; + if (remainder) { + object += pointer_source_alignment - remainder; + if (size >= pointer_source_alignment - remainder) { + size -= pointer_source_alignment - remainder; + } else { + size = 0; + } + } + if (size < sizeof(void*)) continue; + +#ifdef NO_FRAME_POINTER + // Frame pointer omission requires us to use libunwind, which uses direct + // mmap and munmap system calls, and that needs special handling. + if (name2 == kUnnamedProcSelfMapEntry) { + static const uintptr_t page_mask = ~(getpagesize() - 1); + const uintptr_t addr = reinterpret_cast<uintptr_t>(object); + if ((addr & page_mask) == 0 && (size & page_mask) == 0) { + // This is an object we slurped from /proc/self/maps. + // It may or may not be readable at this point. + // + // In case all the above conditions made a mistake, and the object is + // not related to libunwind, we also verify that it's not readable + // before ignoring it. + if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) { + // Skip unreadable object, so we don't crash trying to sweep it. + RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) " + "(msync error %d (%s))", + object, object + size, errno, strerror(errno)); + continue; + } + } + } +#endif + + const char* const max_object = object + size - sizeof(void*); + while (object <= max_object) { + // potentially unaligned load: + const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object); + // Do fast check before the more expensive HaveOnHeapLocked lookup: + // this code runs for all memory words that are potentially pointers: + const bool can_be_on_heap = + // Order tests by the likelyhood of the test failing in 64/32 bit modes. + // Yes, this matters: we either lose 5..6% speed in 32 bit mode + // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode. + // After the alignment test got dropped the above performance figures + // must have changed; might need to revisit this. +#if defined(__x86_64__) + addr <= max_heap_address && // <= is for 0-sized object with max addr + min_heap_address <= addr; +#else + min_heap_address <= addr && + addr <= max_heap_address; // <= is for 0-sized object with max addr +#endif + if (can_be_on_heap) { + const void* ptr = reinterpret_cast<const void*>(addr); + // Too expensive (inner loop): manually uncomment when debugging: + // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object); + size_t object_size; + if (HaveOnHeapLocked(&ptr, &object_size) && + heap_profile->MarkAsLive(ptr)) { + // We take the (hopefully low) risk here of encountering by accident + // a byte sequence in memory that matches an address of + // a heap object which is in fact leaked. + // I.e. in very rare and probably not repeatable/lasting cases + // we might miss some real heap memory leaks. + RAW_VLOG(14, "Found pointer to %p of %" PRIuS " bytes at %p " + "inside %p of size %" PRIuS "", + ptr, object_size, object, whole_object, whole_size); + if (VLOG_IS_ON(15)) { + // log call stacks to help debug how come something is not a leak + HeapProfileTable::AllocInfo alloc; + if (!heap_profile->FindAllocDetails(ptr, &alloc)) { + RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr); + } + RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); + for (int i = 0; i < alloc.stack_depth; ++i) { + RAW_LOG(INFO, " @ %p", alloc.call_stack[i]); + } + } + live_object_count += 1; + live_byte_count += object_size; + live_objects->push_back(AllocObject(ptr, object_size, + IGNORED_ON_HEAP)); + } + } + object += pointer_source_alignment; + } + } + live_objects_total += live_object_count; + live_bytes_total += live_byte_count; + if (live_object_count) { + RAW_VLOG(10, "Removed %" PRId64 " live heap objects of %" PRId64 " bytes: %s%s", + live_object_count, live_byte_count, name, name2); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker leak check disabling components +//---------------------------------------------------------------------- + +// static +void HeapLeakChecker::DisableChecksIn(const char* pattern) { + RAW_LOG(WARNING, "DisableChecksIn(%s) is ignored", pattern); +} + +// static +void HeapLeakChecker::DoIgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); + } else { + RAW_VLOG(10, "Going to ignore live object at %p of %" PRIuS " bytes", + ptr, object_size); + if (ignored_objects == NULL) { + ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) + IgnoredObjectsMap; + } + if (!ignored_objects->insert(make_pair(AsInt(ptr), object_size)).second) { + RAW_LOG(WARNING, "Object at %p is already being ignored", ptr); + } + } +} + +// static +void HeapLeakChecker::UnIgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr); + } else { + bool found = false; + if (ignored_objects) { + IgnoredObjectsMap::iterator object = ignored_objects->find(AsInt(ptr)); + if (object != ignored_objects->end() && object_size == object->second) { + ignored_objects->erase(object); + found = true; + RAW_VLOG(10, "Now not going to ignore live object " + "at %p of %" PRIuS " bytes", ptr, object_size); + } + } + if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker non-static functions +//---------------------------------------------------------------------- + +char* HeapLeakChecker::MakeProfileNameLocked() { + RAW_DCHECK(lock_->IsHeld(), ""); + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const int len = profile_name_prefix->size() + strlen(name_) + 5 + + strlen(HeapProfileTable::kFileExt) + 1; + char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); + snprintf(file_name, len, "%s.%s-end%s", + profile_name_prefix->c_str(), name_, + HeapProfileTable::kFileExt); + return file_name; +} + +void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { + SpinLockHolder l(lock_); + name_ = NULL; // checker is inactive + start_snapshot_ = NULL; + has_checked_ = false; + inuse_bytes_increase_ = 0; + inuse_allocs_increase_ = 0; + keep_profiles_ = false; + char* n = new char[strlen(name) + 1]; // do this before we lock + IgnoreObject(n); // otherwise it might be treated as live due to our stack + { // Heap activity in other threads is paused for this whole scope. + SpinLockHolder al(&alignment_checker_lock); + SpinLockHolder hl(&heap_checker_lock); + MemoryRegionMap::LockHolder ml; + if (heap_checker_on && profile_name_prefix != NULL) { + RAW_DCHECK(strchr(name, '/') == NULL, "must be a simple name"); + memcpy(n, name, strlen(name) + 1); + name_ = n; // checker is active + if (make_start_snapshot) { + start_snapshot_ = heap_profile->TakeSnapshot(); + } + + const HeapProfileTable::Stats& t = heap_profile->total(); + const size_t start_inuse_bytes = t.alloc_size - t.free_size; + const size_t start_inuse_allocs = t.allocs - t.frees; + RAW_VLOG(10, "Start check \"%s\" profile: %" PRIuS " bytes " + "in %" PRIuS " objects", + name_, start_inuse_bytes, start_inuse_allocs); + } else { + RAW_LOG(WARNING, "Heap checker is not active, " + "hence checker \"%s\" will do nothing!", name); + RAW_LOG(WARNING, "To activate set the HEAPCHECK environment variable.\n"); + } + } + if (name_ == NULL) { + UnIgnoreObject(n); + delete[] n; // must be done after we unlock + } +} + +HeapLeakChecker::HeapLeakChecker(const char *name) : lock_(new SpinLock) { + RAW_DCHECK(strcmp(name, "_main_") != 0, "_main_ is reserved"); + Create(name, true/*create start_snapshot_*/); +} + +HeapLeakChecker::HeapLeakChecker() : lock_(new SpinLock) { + if (FLAGS_heap_check_before_constructors) { + // We want to check for leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we do not + // create a baseline snapshot and hence check for leaks of objects + // that may have already been created. + Create("_main_", false); + } else { + // We want to ignore leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we snapshot + // the current heap contents and use them as a baseline that is + // not reported by the leak checker. + Create("_main_", true); + } +} + +ssize_t HeapLeakChecker::BytesLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_bytes_increase_; +} + +ssize_t HeapLeakChecker::ObjectsLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_allocs_increase_; +} + +// Save pid of main thread for using in naming dump files +static int32 main_thread_pid = getpid(); +#ifdef HAVE_PROGRAM_INVOCATION_NAME +#ifdef __UCLIBC__ +extern const char* program_invocation_name; +extern const char* program_invocation_short_name; +#else +extern char* program_invocation_name; +extern char* program_invocation_short_name; +#endif +static const char* invocation_name() { return program_invocation_short_name; } +static string invocation_path() { return program_invocation_name; } +#else +static const char* invocation_name() { return "<your binary>"; } +static string invocation_path() { return "<your binary>"; } +#endif + +// Prints commands that users can run to get more information +// about the reported leaks. +static void SuggestPprofCommand(const char* pprof_file_arg) { + // Extra help information to print for the user when the test is + // being run in a way where the straightforward pprof command will + // not suffice. + string extra_help; + + // Common header info to print for remote runs + const string remote_header = + "This program is being executed remotely and therefore the pprof\n" + "command printed above will not work. Either run this program\n" + "locally, or adjust the pprof command as follows to allow it to\n" + "work on your local machine:\n"; + + // Extra command for fetching remote data + string fetch_cmd; + + RAW_LOG(WARNING, + "\n\n" + "If the preceding stack traces are not enough to find " + "the leaks, try running THIS shell command:\n\n" + "%s%s %s \"%s\" --inuse_objects --lines --heapcheck " + " --edgefraction=1e-10 --nodefraction=1e-10 --gv\n" + "\n" + "%s" + "If you are still puzzled about why the leaks are " + "there, try rerunning this program with " + "HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with " + "HEAP_CHECK_MAX_POINTER_OFFSET=-1\n" + "If the leak report occurs in a small fraction of runs, " + "try running with TCMALLOC_MAX_FREE_QUEUE_SIZE of few hundred MB " + "or with TCMALLOC_RECLAIM_MEMORY=false, " // only works for debugalloc + "it might help find leaks more repeatably\n", + fetch_cmd.c_str(), + "pprof", // works as long as pprof is on your path + invocation_path().c_str(), + pprof_file_arg, + extra_help.c_str() + ); +} + +bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { + SpinLockHolder l(lock_); + // The locking also helps us keep the messages + // for the two checks close together. + SpinLockHolder al(&alignment_checker_lock); + + // thread-safe: protected by alignment_checker_lock + static bool have_disabled_hooks_for_symbolize = false; + // Once we've checked for leaks and symbolized the results once, it's + // not safe to do it again. This is because in order to symbolize + // safely, we had to disable all the malloc hooks here, so we no + // longer can be confident we've collected all the data we need. + if (have_disabled_hooks_for_symbolize) { + RAW_LOG(FATAL, "Must not call heap leak checker manually after " + " program-exit's automatic check."); + } + + HeapProfileTable::Snapshot* leaks = NULL; + char* pprof_file = NULL; + + { + // Heap activity in other threads is paused during this function + // (i.e. until we got all profile difference info). + SpinLockHolder hl(&heap_checker_lock); + if (heap_checker_on == false) { + if (name_ != NULL) { // leak checking enabled when created the checker + RAW_LOG(WARNING, "Heap leak checker got turned off after checker " + "\"%s\" has been created, no leak check is being done for it!", + name_); + } + return true; + } + + // Update global_region_caller_ranges. They may need to change since + // e.g. initialization because shared libraries might have been loaded or + // unloaded. + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + RAW_CHECK(pm_result == PROC_MAPS_USED, ""); + + // Keep track of number of internally allocated objects so we + // can detect leaks in the heap-leak-checket itself + const int initial_allocs = Allocator::alloc_count(); + + if (name_ == NULL) { + RAW_LOG(FATAL, "Heap leak checker must not be turned on " + "after construction of a HeapLeakChecker"); + } + + MemoryRegionMap::LockHolder ml; + int a_local_var; // Use our stack ptr to make stack data live: + + // Make the heap profile, other threads are locked out. + HeapProfileTable::Snapshot* base = + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + IgnoreAllLiveObjectsLocked(&a_local_var); + leaks = heap_profile->NonLiveSnapshot(base); + + inuse_bytes_increase_ = static_cast<ssize_t>(leaks->total().alloc_size); + inuse_allocs_increase_ = static_cast<ssize_t>(leaks->total().allocs); + if (leaks->Empty()) { + heap_profile->ReleaseSnapshot(leaks); + leaks = NULL; + + // We can only check for internal leaks along the no-user-leak + // path since in the leak path we temporarily release + // heap_checker_lock and another thread can come in and disturb + // allocation counts. + if (Allocator::alloc_count() != initial_allocs) { + RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects ; %d -> %d", + Allocator::alloc_count() - initial_allocs, + initial_allocs, Allocator::alloc_count()); + } + } else if (FLAGS_heap_check_test_pointer_alignment) { + if (pointer_source_alignment == 1) { + RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: " + "--heap_check_pointer_source_alignment was already set to 1"); + } else { + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); + } else { + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); + } + heap_profile->ReleaseSnapshot(leaks_wo_align); + } + } + + if (leaks != NULL) { + pprof_file = MakeProfileNameLocked(); + } + } + + has_checked_ = true; + if (leaks == NULL) { + if (FLAGS_heap_check_max_pointer_offset == -1) { + RAW_LOG(WARNING, + "Found no leaks without max_pointer_offset restriction: " + "it's possible that the default value of " + "heap_check_max_pointer_offset flag is too low. " + "Do you use pointers with larger than that offsets " + "pointing in the middle of heap-allocated objects?"); + } + const HeapProfileTable::Stats& stats = heap_profile->total(); + RAW_VLOG(heap_checker_info_level, + "No leaks found for check \"%s\" " + "(but no 100%% guarantee that there aren't any): " + "found %" PRId64 " reachable heap objects of %" PRId64 " bytes", + name_, + int64(stats.allocs - stats.frees), + int64(stats.alloc_size - stats.free_size)); + } else { + if (should_symbolize == SYMBOLIZE) { + // To turn addresses into symbols, we need to fork, which is a + // problem if both parent and child end up trying to call the + // same malloc-hooks we've set up, at the same time. To avoid + // trouble, we turn off the hooks before symbolizing. Note that + // this makes it unsafe to ever leak-report again! Luckily, we + // typically only want to report once in a program's run, at the + // very end. + if (MallocHook::GetNewHook() == NewHook) + MallocHook::SetNewHook(NULL); + if (MallocHook::GetDeleteHook() == DeleteHook) + MallocHook::SetDeleteHook(NULL); + MemoryRegionMap::Shutdown(); + // Make sure all the hooks really got unset: + RAW_CHECK(MallocHook::GetNewHook() == NULL, ""); + RAW_CHECK(MallocHook::GetDeleteHook() == NULL, ""); + RAW_CHECK(MallocHook::GetMmapHook() == NULL, ""); + RAW_CHECK(MallocHook::GetSbrkHook() == NULL, ""); + have_disabled_hooks_for_symbolize = true; + leaks->ReportLeaks(name_, pprof_file, true); // true = should_symbolize + } else { + leaks->ReportLeaks(name_, pprof_file, false); + } + if (FLAGS_heap_check_identify_leaks) { + leaks->ReportIndividualObjects(); + } + + SuggestPprofCommand(pprof_file); + + { + SpinLockHolder hl(&heap_checker_lock); + heap_profile->ReleaseSnapshot(leaks); + Allocator::Free(pprof_file); + } + } + + return (leaks == NULL); +} + +HeapLeakChecker::~HeapLeakChecker() { + if (name_ != NULL) { // had leak checking enabled when created the checker + if (!has_checked_) { + RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method" + " must be called on any created HeapLeakChecker"); + } + + // Deallocate any snapshot taken at start + if (start_snapshot_ != NULL) { + SpinLockHolder l(&heap_checker_lock); + heap_profile->ReleaseSnapshot( + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_)); + } + + UnIgnoreObject(name_); + delete[] name_; + name_ = NULL; + } + delete lock_; +} + +//---------------------------------------------------------------------- +// HeapLeakChecker overall heap check components +//---------------------------------------------------------------------- + +// static +bool HeapLeakChecker::IsActive() { + SpinLockHolder l(&heap_checker_lock); + return heap_checker_on; +} + +vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL; + +// When a HeapCleaner object is intialized, add its function to the static list +// of cleaners to be run before leaks checking. +HeapCleaner::HeapCleaner(void_function f) { + if (heap_cleanups_ == NULL) + heap_cleanups_ = new vector<HeapCleaner::void_function>; + heap_cleanups_->push_back(f); +} + +// Run all of the cleanup functions and delete the vector. +void HeapCleaner::RunHeapCleanups() { + if (!heap_cleanups_) + return; + for (int i = 0; i < heap_cleanups_->size(); i++) { + void (*f)(void) = (*heap_cleanups_)[i]; + f(); + } + delete heap_cleanups_; + heap_cleanups_ = NULL; +} + +// Program exit heap cleanup registered as a module object destructor. +// Will not get executed when we crash on a signal. +// +void HeapLeakChecker_RunHeapCleanups() { + if (FLAGS_heap_check == "local") // don't check heap in this mode + return; + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + HeapCleaner::RunHeapCleanups(); + if (!FLAGS_heap_check_after_destructors) HeapLeakChecker::DoMainHeapCheck(); +} + +static bool internal_init_start_has_run = false; + +// Called exactly once, before main() (but hopefully just before). +// This picks a good unique name for the dumped leak checking heap profiles. +// +// Because we crash when InternalInitStart is called more than once, +// it's fine that we hold heap_checker_lock only around pieces of +// this function: this is still enough for thread-safety w.r.t. other functions +// of this module. +// We can't hold heap_checker_lock throughout because it would deadlock +// on a memory allocation since our new/delete hooks can be on. +// +void HeapLeakChecker_InternalInitStart() { + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!internal_init_start_has_run, + "Heap-check constructor called twice. Perhaps you both linked" + " in the heap checker, and also used LD_PRELOAD to load it?"); + internal_init_start_has_run = true; + +#ifdef ADDRESS_SANITIZER + // AddressSanitizer's custom malloc conflicts with HeapChecker. + FLAGS_heap_check = ""; +#endif + + if (FLAGS_heap_check.empty()) { + // turns out we do not need checking in the end; can stop profiling + HeapLeakChecker::TurnItselfOffLocked(); + return; + } else if (RunningOnValgrind()) { + // There is no point in trying -- we'll just fail. + RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off"); + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + } + + // Changing this to false can be useful when debugging heap-checker itself: + if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) { + RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off"); + SpinLockHolder l(&heap_checker_lock); + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + + { SpinLockHolder l(&heap_checker_lock); + if (!constructor_heap_profiling) { + RAW_LOG(FATAL, "Can not start so late. You have to enable heap checking " + "with HEAPCHECK=<mode>."); + } + } + + // Set all flags + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + if (FLAGS_heap_check == "minimal") { + // The least we can check. + FLAGS_heap_check_before_constructors = false; // from after main + // (ignore more) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "normal") { + // Faster than 'minimal' and not much stricter. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "strict") { + // A bit stricter than 'normal': global destructors must fully clean up + // after themselves if they are present. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (less data live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "draconian") { + // Drop not very portable and not very exact live heap flooding. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (need them) + FLAGS_heap_check_ignore_thread_live = false; // no live flood (stricter) + FLAGS_heap_check_ignore_global_live = false; // no live flood (stricter) + } else if (FLAGS_heap_check == "as-is") { + // do nothing: use other flags as is + } else if (FLAGS_heap_check == "local") { + // do nothing + } else { + RAW_LOG(FATAL, "Unsupported heap_check flag: %s", + FLAGS_heap_check.c_str()); + } + // FreeBSD doesn't seem to honor atexit execution order: + // http://code.google.com/p/gperftools/issues/detail?id=375 + // Since heap-checking before destructors depends on atexit running + // at the right time, on FreeBSD we always check after, even in the + // less strict modes. This just means FreeBSD is always a bit + // stricter in its checking than other OSes. + // This now appears to be the case in other OSes as well; + // so always check afterwards. + FLAGS_heap_check_after_destructors = true; + + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(heap_checker_pid == getpid(), ""); + heap_checker_on = true; + RAW_DCHECK(heap_profile, ""); + HeapLeakChecker::ProcMapsResult pm_result = HeapLeakChecker::UseProcMapsLocked(HeapLeakChecker::DISABLE_LIBRARY_ALLOCS); + // might neeed to do this more than once + // if one later dynamically loads libraries that we want disabled + if (pm_result != HeapLeakChecker::PROC_MAPS_USED) { // can't function + HeapLeakChecker::TurnItselfOffLocked(); + return; + } + } + + // make a good place and name for heap profile leak dumps + string* profile_prefix = + new string(FLAGS_heap_check_dump_directory + "/" + invocation_name()); + + // Finalize prefix for dumping leak checking profiles. + const int32 our_pid = getpid(); // safest to call getpid() outside lock + { SpinLockHolder l(&heap_checker_lock); + // main_thread_pid might still be 0 if this function is being called before + // global constructors. In that case, our pid *is* the main pid. + if (main_thread_pid == 0) + main_thread_pid = our_pid; + } + char pid_buf[15]; + snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid); + *profile_prefix += pid_buf; + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(profile_name_prefix == NULL, ""); + profile_name_prefix = profile_prefix; + } + + // Make sure new/delete hooks are installed properly + // and heap profiler is indeed able to keep track + // of the objects being allocated. + // We test this to make sure we are indeed checking for leaks. + char* test_str = new char[5]; + size_t size; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + delete[] test_str; + { SpinLockHolder l(&heap_checker_lock); + // This check can fail when it should not if another thread allocates + // into this same spot right this moment, + // which is unlikely since this code runs in InitGoogle. + RAW_CHECK(!heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + // If we crash in the above code, it probably means that + // "nm <this_binary> | grep new" will show that tcmalloc's new/delete + // implementation did not get linked-in into this binary + // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined). + // If this happens, it is a BUILD bug to be fixed. + + RAW_VLOG(heap_checker_info_level, + "WARNING: Perftools heap leak checker is active " + "-- Performance may suffer"); + + if (FLAGS_heap_check != "local") { + HeapLeakChecker* main_hc = new HeapLeakChecker(); + SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(main_heap_checker == NULL, + "Repeated creation of main_heap_checker"); + main_heap_checker = main_hc; + do_main_heap_check = true; + } + + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_checker_on && constructor_heap_profiling, + "Leak checking is expected to be fully turned on now"); + } + + // For binaries built in debug mode, this will set release queue of + // debugallocation.cc to 100M to make it less likely for real leaks to + // be hidden due to reuse of heap memory object addresses. + // Running a test with --malloc_reclaim_memory=0 would help find leaks even + // better, but the test might run out of memory as a result. + // The scenario is that a heap object at address X is allocated and freed, + // but some other data-structure still retains a pointer to X. + // Then the same heap memory is used for another object, which is leaked, + // but the leak is not noticed due to the pointer to the original object at X. + // TODO(csilvers): support this in some manner. +#if 0 + SetCommandLineOptionWithMode("max_free_queue_size", "104857600", // 100M + SET_FLAG_IF_DEFAULT); +#endif +} + +// We want this to run early as well, but not so early as +// ::BeforeConstructors (we want flag assignments to have already +// happened, for instance). Initializer-registration does the trick. +REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker_InternalInitStart()); +REGISTER_MODULE_DESTRUCTOR(init_start, HeapLeakChecker_RunHeapCleanups()); + +// static +bool HeapLeakChecker::NoGlobalLeaksMaybeSymbolize( + ShouldSymbolize should_symbolize) { + // we never delete or change main_heap_checker once it's set: + HeapLeakChecker* main_hc = GlobalChecker(); + if (main_hc) { + RAW_VLOG(10, "Checking for whole-program memory leaks"); + return main_hc->DoNoLeaks(should_symbolize); + } + return true; +} + +// static +bool HeapLeakChecker::DoMainHeapCheck() { + if (FLAGS_heap_check_delay_seconds > 0) { + sleep(FLAGS_heap_check_delay_seconds); + } + { SpinLockHolder l(&heap_checker_lock); + if (!do_main_heap_check) return false; + RAW_DCHECK(heap_checker_pid == getpid(), ""); + do_main_heap_check = false; // will do it now; no need to do it more + } + + // The program is over, so it's safe to symbolize addresses (which + // requires a fork) because no serious work is expected to be done + // after this. Symbolizing is really useful -- knowing what + // function has a leak is better than knowing just an address -- + // and while we can only safely symbolize once in a program run, + // now is the time (after all, there's no "later" that would be better). + if (!NoGlobalLeaksMaybeSymbolize(SYMBOLIZE)) { + if (FLAGS_heap_check_identify_leaks) { + RAW_LOG(FATAL, "Whole-program memory leaks found."); + } + RAW_LOG(ERROR, "Exiting with error code (instead of crashing) " + "because of whole-program memory leaks"); + _exit(1); // we don't want to call atexit() routines! + } + return true; +} + +// static +HeapLeakChecker* HeapLeakChecker::GlobalChecker() { + SpinLockHolder l(&heap_checker_lock); + return main_heap_checker; +} + +// static +bool HeapLeakChecker::NoGlobalLeaks() { + // symbolizing requires a fork, which isn't safe to do in general. + return NoGlobalLeaksMaybeSymbolize(DO_NOT_SYMBOLIZE); +} + +// static +void HeapLeakChecker::CancelGlobalCheck() { + SpinLockHolder l(&heap_checker_lock); + if (do_main_heap_check) { + RAW_VLOG(heap_checker_info_level, + "Canceling the automatic at-exit whole-program memory leak check"); + do_main_heap_check = false; + } +} + +// static +void HeapLeakChecker::BeforeConstructorsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(!constructor_heap_profiling, + "BeforeConstructorsLocked called multiple times"); +#ifdef ADDRESS_SANITIZER + // AddressSanitizer's custom malloc conflicts with HeapChecker. + return; +#endif + // Set hooks early to crash if 'new' gets called before we make heap_profile, + // and make sure no other hooks existed: + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); + constructor_heap_profiling = true; + MemoryRegionMap::Init(1, /* use_buckets */ false); + // Set up MemoryRegionMap with (at least) one caller stack frame to record + // (important that it's done before HeapProfileTable creation below). + Allocator::Init(); + RAW_CHECK(heap_profile == NULL, ""); + heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) + HeapProfileTable(&Allocator::Allocate, &Allocator::Free, + /* profile_mmap */ false); + RAW_VLOG(10, "Starting tracking the heap"); + heap_checker_on = true; +} + +// static +void HeapLeakChecker::TurnItselfOffLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Set FLAGS_heap_check to "", for users who test for it + if (!FLAGS_heap_check.empty()) // be a noop in the common case + FLAGS_heap_check.clear(); // because clear() could allocate memory + if (constructor_heap_profiling) { + RAW_CHECK(heap_checker_on, ""); + RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off"); + heap_checker_on = false; + // Unset our hooks checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); + Allocator::DeleteAndNull(&heap_profile); + // free our optional global data: + Allocator::DeleteAndNullIfNot(&ignored_objects); + Allocator::DeleteAndNullIfNot(&disabled_ranges); + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + Allocator::Shutdown(); + MemoryRegionMap::Shutdown(); + } + RAW_CHECK(!heap_checker_on, ""); +} + +extern bool heap_leak_checker_bcad_variable; // in heap-checker-bcad.cc + +static bool has_called_before_constructors = false; + +// TODO(maxim): inline this function with +// MallocHook_InitAtFirstAllocation_HeapLeakChecker, and also rename +// HeapLeakChecker::BeforeConstructorsLocked. +void HeapLeakChecker_BeforeConstructors() { + SpinLockHolder l(&heap_checker_lock); + // We can be called from several places: the first mmap/sbrk/alloc call + // or the first global c-tor from heap-checker-bcad.cc: + // Do not re-execute initialization: + if (has_called_before_constructors) return; + has_called_before_constructors = true; + + heap_checker_pid = getpid(); // set it always + heap_leak_checker_bcad_variable = true; + // just to reference it, so that heap-checker-bcad.o is linked in + + // This function can be called *very* early, before the normal + // global-constructor that sets FLAGS_verbose. Set it manually now, + // so the RAW_LOG messages here are controllable. + const char* verbose_str = GetenvBeforeMain("PERFTOOLS_VERBOSE"); + if (verbose_str && atoi(verbose_str)) { // different than the default of 0? + FLAGS_verbose = atoi(verbose_str); + } + + bool need_heap_check = true; + // The user indicates a desire for heap-checking via the HEAPCHECK + // environment variable. If it's not set, there's no way to do + // heap-checking. + if (!GetenvBeforeMain("HEAPCHECK")) { + need_heap_check = false; + } +#ifdef HAVE_GETEUID + if (need_heap_check && getuid() != geteuid()) { + // heap-checker writes out files. Thus, for security reasons, we don't + // recognize the env. var. to turn on heap-checking if we're setuid. + RAW_LOG(WARNING, ("HeapChecker: ignoring HEAPCHECK because " + "program seems to be setuid\n")); + need_heap_check = false; + } +#endif + if (need_heap_check) { + HeapLeakChecker::BeforeConstructorsLocked(); + } +} + +// This function overrides the weak function defined in malloc_hook.cc and +// called by one of the initial malloc hooks (malloc_hook.cc) when the very +// first memory allocation or an mmap/sbrk happens. This ensures that +// HeapLeakChecker is initialized and installs all its hooks early enough to +// track absolutely all memory allocations and all memory region acquisitions +// via mmap and sbrk. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + HeapLeakChecker_BeforeConstructors(); +} + +// This function is executed after all global object destructors run. +void HeapLeakChecker_AfterDestructors() { + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + if (FLAGS_heap_check_after_destructors) { + if (HeapLeakChecker::DoMainHeapCheck()) { + const struct timespec sleep_time = { 0, 500000000 }; // 500 ms + nanosleep(&sleep_time, NULL); + // Need this hack to wait for other pthreads to exit. + // Otherwise tcmalloc find errors + // on a free() call from pthreads. + } + } + SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!do_main_heap_check, "should have done it"); +} + +//---------------------------------------------------------------------- +// HeapLeakChecker disabling helpers +//---------------------------------------------------------------------- + +// These functions are at the end of the file to prevent their inlining: + +// static +void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(start_address < end_address, ""); + if (disabled_ranges == NULL) { + disabled_ranges = new(Allocator::Allocate(sizeof(DisabledRangeMap))) + DisabledRangeMap; + } + RangeValue value; + value.start_address = AsInt(start_address); + value.max_depth = max_depth; + if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { + RAW_VLOG(10, "Disabling leak checking in stack traces " + "under frame addresses between %p..%p", + start_address, end_address); + } else { // check that this is just a verbatim repetition + RangeValue const& val = disabled_ranges->find(AsInt(end_address))->second; + if (val.max_depth != value.max_depth || + val.start_address != value.start_address) { + RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: " + "(%p, %p, %d) vs. (%p, %p, %d)", + AsPtr(val.start_address), end_address, val.max_depth, + start_address, end_address, max_depth); + } + } +} + +// static +inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, + size_t* object_size) { + // Commented-out because HaveOnHeapLocked is very performance-critical: + // RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const uintptr_t addr = AsInt(*ptr); + if (heap_profile->FindInsideAlloc( + *ptr, max_heap_object_size, ptr, object_size)) { + RAW_VLOG(16, "Got pointer into %p at +%" PRIuPTR " offset", + *ptr, addr - AsInt(*ptr)); + return true; + } + return false; +} + +// static +const void* HeapLeakChecker::GetAllocCaller(void* ptr) { + // this is used only in the unittest, so the heavy checks are fine + HeapProfileTable::AllocInfo info; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), ""); + } + RAW_CHECK(info.stack_depth >= 1, ""); + return info.call_stack[0]; +} diff --git a/src/third_party/gperftools-2.7/src/heap-profile-stats.h b/src/third_party/gperftools-2.7/src/heap-profile-stats.h new file mode 100644 index 00000000000..ae45d5883fa --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-profile-stats.h @@ -0,0 +1,78 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file defines structs to accumulate memory allocation and deallocation +// counts. These structs are commonly used for malloc (in HeapProfileTable) +// and mmap (in MemoryRegionMap). + +// A bucket is data structure for heap profiling to store a pair of a stack +// trace and counts of (de)allocation. Buckets are stored in a hash table +// which is declared as "HeapProfileBucket**". +// +// A hash value is computed from a stack trace. Collision in the hash table +// is resolved by separate chaining with linked lists. The links in the list +// are implemented with the member "HeapProfileBucket* next". +// +// A structure of a hash table HeapProfileBucket** bucket_table would be like: +// bucket_table[0] => NULL +// bucket_table[1] => HeapProfileBucket() => HeapProfileBucket() => NULL +// ... +// bucket_table[i] => HeapProfileBucket() => NULL +// ... +// bucket_table[n] => HeapProfileBucket() => NULL + +#ifndef HEAP_PROFILE_STATS_H_ +#define HEAP_PROFILE_STATS_H_ + +struct HeapProfileStats { + // Returns true if the two HeapProfileStats are semantically equal. + bool Equivalent(const HeapProfileStats& other) const { + return allocs - frees == other.allocs - other.frees && + alloc_size - free_size == other.alloc_size - other.free_size; + } + + int32 allocs; // Number of allocation calls. + int32 frees; // Number of free calls. + int64 alloc_size; // Total size of all allocated objects so far. + int64 free_size; // Total size of all freed objects so far. +}; + +// Allocation and deallocation statistics per each stack trace. +struct HeapProfileBucket : public HeapProfileStats { + // Longest stack trace we record. + static const int kMaxStackDepth = 32; + + uintptr_t hash; // Hash value of the stack trace. + int depth; // Depth of stack trace. + const void** stack; // Stack trace. + HeapProfileBucket* next; // Next entry in hash-table. +}; + +#endif // HEAP_PROFILE_STATS_H_ diff --git a/src/third_party/gperftools-2.7/src/heap-profile-table.cc b/src/third_party/gperftools-2.7/src/heap-profile-table.cc new file mode 100644 index 00000000000..7486468c056 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-profile-table.cc @@ -0,0 +1,631 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <fcntl.h> // for open() +#ifdef HAVE_GLOB_H +#include <glob.h> +#ifndef GLOB_NOMATCH // true on some old cygwins +# define GLOB_NOMATCH 0 +#endif +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIxPTR +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#include <errno.h> +#include <stdarg.h> +#include <string> +#include <map> +#include <algorithm> // for sort(), equal(), and copy() + +#include "heap-profile-table.h" + +#include "base/logging.h" +#include "raw_printer.h" +#include "symbolize.h" +#include <gperftools/stacktrace.h> +#include <gperftools/malloc_hook.h> +#include "memory_region_map.h" +#include "base/commandlineflags.h" +#include "base/logging.h" // for the RawFD I/O commands +#include "base/sysinfo.h" + +using std::sort; +using std::equal; +using std::copy; +using std::string; +using std::map; + +using tcmalloc::FillProcSelfMaps; // from sysinfo.h +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + +//---------------------------------------------------------------------- + +DEFINE_bool(cleanup_old_heap_profiles, + EnvToBool("HEAP_PROFILE_CLEANUP", true), + "At initialization time, delete old heap profiles."); + +DEFINE_int32(heap_check_max_leaks, + EnvToInt("HEAP_CHECK_MAX_LEAKS", 20), + "The maximum number of leak reports to print."); + +//---------------------------------------------------------------------- + +// header of the dumped heap profile +static const char kProfileHeader[] = "heap profile: "; +static const char kProcSelfMapsHeader[] = "\nMAPPED_LIBRARIES:\n"; + +//---------------------------------------------------------------------- + +const char HeapProfileTable::kFileExt[] = ".heap"; + +//---------------------------------------------------------------------- + +static const int kHashTableSize = 179999; // Size for bucket_table_. +/*static*/ const int HeapProfileTable::kMaxStackDepth; + +//---------------------------------------------------------------------- + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 2; +#else +static const int kStripFrames = 3; +#endif + +// For sorting Stats or Buckets by in-use space +static bool ByAllocatedSpace(HeapProfileTable::Stats* a, + HeapProfileTable::Stats* b) { + // Return true iff "a" has more allocated space than "b" + return (a->alloc_size - a->free_size) > (b->alloc_size - b->free_size); +} + +//---------------------------------------------------------------------- + +HeapProfileTable::HeapProfileTable(Allocator alloc, + DeAllocator dealloc, + bool profile_mmap) + : alloc_(alloc), + dealloc_(dealloc), + profile_mmap_(profile_mmap), + bucket_table_(NULL), + num_buckets_(0), + address_map_(NULL) { + // Make a hash table for buckets. + const int table_bytes = kHashTableSize * sizeof(*bucket_table_); + bucket_table_ = static_cast<Bucket**>(alloc_(table_bytes)); + memset(bucket_table_, 0, table_bytes); + + // Make an allocation map. + address_map_ = + new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); + + // Initialize. + memset(&total_, 0, sizeof(total_)); + num_buckets_ = 0; +} + +HeapProfileTable::~HeapProfileTable() { + // Free the allocation map. + address_map_->~AllocationMap(); + dealloc_(address_map_); + address_map_ = NULL; + + // Free the hash table. + for (int i = 0; i < kHashTableSize; i++) { + for (Bucket* curr = bucket_table_[i]; curr != 0; /**/) { + Bucket* bucket = curr; + curr = curr->next; + dealloc_(bucket->stack); + dealloc_(bucket); + } + } + dealloc_(bucket_table_); + bucket_table_ = NULL; +} + +HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, + const void* const key[]) { + // Make hash-value + uintptr_t h = 0; + for (int i = 0; i < depth; i++) { + h += reinterpret_cast<uintptr_t>(key[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + // Lookup stack trace in table + unsigned int buck = ((unsigned int) h) % kHashTableSize; + for (Bucket* b = bucket_table_[buck]; b != 0; b = b->next) { + if ((b->hash == h) && + (b->depth == depth) && + equal(key, key + depth, b->stack)) { + return b; + } + } + + // Create new bucket + const size_t key_size = sizeof(key[0]) * depth; + const void** kcopy = reinterpret_cast<const void**>(alloc_(key_size)); + copy(key, key + depth, kcopy); + Bucket* b = reinterpret_cast<Bucket*>(alloc_(sizeof(Bucket))); + memset(b, 0, sizeof(*b)); + b->hash = h; + b->depth = depth; + b->stack = kcopy; + b->next = bucket_table_[buck]; + bucket_table_[buck] = b; + num_buckets_++; + return b; +} + +int HeapProfileTable::GetCallerStackTrace( + int skip_count, void* stack[kMaxStackDepth]) { + return MallocHook::GetCallerStackTrace( + stack, kMaxStackDepth, kStripFrames + skip_count + 1); +} + +void HeapProfileTable::RecordAlloc( + const void* ptr, size_t bytes, int stack_depth, + const void* const call_stack[]) { + Bucket* b = GetBucket(stack_depth, call_stack); + b->allocs++; + b->alloc_size += bytes; + total_.allocs++; + total_.alloc_size += bytes; + + AllocValue v; + v.set_bucket(b); // also did set_live(false); set_ignore(false) + v.bytes = bytes; + address_map_->Insert(ptr, v); +} + +void HeapProfileTable::RecordFree(const void* ptr) { + AllocValue v; + if (address_map_->FindAndRemove(ptr, &v)) { + Bucket* b = v.bucket(); + b->frees++; + b->free_size += v.bytes; + total_.frees++; + total_.free_size += v.bytes; + } +} + +bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const { + const AllocValue* alloc_value = address_map_->Find(ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::FindAllocDetails(const void* ptr, + AllocInfo* info) const { + const AllocValue* alloc_value = address_map_->Find(ptr); + if (alloc_value != NULL) { + info->object_size = alloc_value->bytes; + info->call_stack = alloc_value->bucket()->stack; + info->stack_depth = alloc_value->bucket()->depth; + } + return alloc_value != NULL; +} + +bool HeapProfileTable::FindInsideAlloc(const void* ptr, + size_t max_size, + const void** object_ptr, + size_t* object_size) const { + const AllocValue* alloc_value = + address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::MarkAsLive(const void* ptr) { + AllocValue* alloc = address_map_->FindMutable(ptr); + if (alloc && !alloc->live()) { + alloc->set_live(true); + return true; + } + return false; +} + +void HeapProfileTable::MarkAsIgnored(const void* ptr) { + AllocValue* alloc = address_map_->FindMutable(ptr); + if (alloc) { + alloc->set_ignore(true); + } +} + +// We'd be happier using snprintfer, but we don't to reduce dependencies. +int HeapProfileTable::UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats) { + if (profile_stats != NULL) { + profile_stats->allocs += b.allocs; + profile_stats->alloc_size += b.alloc_size; + profile_stats->frees += b.frees; + profile_stats->free_size += b.free_size; + } + int printed = + snprintf(buf + buflen, bufsize - buflen, "%6d: %8" PRId64 " [%6d: %8" PRId64 "] @%s", + b.allocs - b.frees, + b.alloc_size - b.free_size, + b.allocs, + b.alloc_size, + extra); + // If it looks like the snprintf failed, ignore the fact we printed anything + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + for (int d = 0; d < b.depth; d++) { + printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08" PRIxPTR, + reinterpret_cast<uintptr_t>(b.stack[d])); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + } + printed = snprintf(buf + buflen, bufsize - buflen, "\n"); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + return buflen; +} + +HeapProfileTable::Bucket** +HeapProfileTable::MakeSortedBucketList() const { + Bucket** list = static_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); + + int bucket_count = 0; + for (int i = 0; i < kHashTableSize; i++) { + for (Bucket* curr = bucket_table_[i]; curr != 0; curr = curr->next) { + list[bucket_count++] = curr; + } + } + RAW_DCHECK(bucket_count == num_buckets_, ""); + + sort(list, list + num_buckets_, ByAllocatedSpace); + + return list; +} + +void HeapProfileTable::IterateOrderedAllocContexts( + AllocContextIterator callback) const { + Bucket** list = MakeSortedBucketList(); + AllocContextInfo info; + for (int i = 0; i < num_buckets_; ++i) { + *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]); + info.stack_depth = list[i]->depth; + info.call_stack = list[i]->stack; + callback(info); + } + dealloc_(list); +} + +int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { + Bucket** list = MakeSortedBucketList(); + + // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info". + // In the cases buf is too small, we'd rather leave out the last + // buckets than leave out the /proc/self/maps info. To ensure that, + // we actually print the /proc/self/maps info first, then move it to + // the end of the buffer, then write the bucket info into whatever + // is remaining, and then move the maps info one last time to close + // any gaps. Whew! + int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader); + if (map_length < 0 || map_length >= size) { + dealloc_(list); + return 0; + } + bool dummy; // "wrote_all" -- did /proc/self/maps fit in its entirety? + map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy); + RAW_DCHECK(map_length <= size, ""); + char* const map_start = buf + size - map_length; // move to end + memmove(map_start, buf, map_length); + size -= map_length; + + Stats stats; + memset(&stats, 0, sizeof(stats)); + int bucket_length = snprintf(buf, size, "%s", kProfileHeader); + if (bucket_length < 0 || bucket_length >= size) { + dealloc_(list); + return 0; + } + bucket_length = UnparseBucket(total_, buf, bucket_length, size, + " heapprofile", &stats); + + // Dump the mmap list first. + if (profile_mmap_) { + BufferArgs buffer(buf, bucket_length, size); + MemoryRegionMap::IterateBuckets<BufferArgs*>(DumpBucketIterator, &buffer); + bucket_length = buffer.buflen; + } + + for (int i = 0; i < num_buckets_; i++) { + bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "", + &stats); + } + RAW_DCHECK(bucket_length < size, ""); + + dealloc_(list); + + RAW_DCHECK(buf + bucket_length <= map_start, ""); + memmove(buf + bucket_length, map_start, map_length); // close the gap + + return bucket_length + map_length; +} + +// static +void HeapProfileTable::DumpBucketIterator(const Bucket* bucket, + BufferArgs* args) { + args->buflen = UnparseBucket(*bucket, args->buf, args->buflen, args->bufsize, + "", NULL); +} + +inline +void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args) { + if (v->live()) { + v->set_live(false); + return; + } + if (v->ignore()) { + return; + } + Bucket b; + memset(&b, 0, sizeof(b)); + b.allocs = 1; + b.alloc_size = v->bytes; + b.depth = v->bucket()->depth; + b.stack = v->bucket()->stack; + char buf[1024]; + int len = UnparseBucket(b, buf, 0, sizeof(buf), "", args.profile_stats); + RawWrite(args.fd, buf, len); +} + +// Callback from NonLiveSnapshot; adds entry to arg->dest +// if not the entry is not live and is not present in arg->base. +void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg) { + if (v->live()) { + v->set_live(false); + } else { + if (arg->base != NULL && arg->base->map_.Find(ptr) != NULL) { + // Present in arg->base, so do not save + } else { + arg->dest->Add(ptr, *v); + } + } +} + +bool HeapProfileTable::WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations) { + RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name); + RawFD fd = RawOpenForWriting(file_name); + if (fd != kIllegalRawFD) { + RawWrite(fd, kProfileHeader, strlen(kProfileHeader)); + char buf[512]; + int len = UnparseBucket(total, buf, 0, sizeof(buf), " heapprofile", + NULL); + RawWrite(fd, buf, len); + const DumpArgs args(fd, NULL); + allocations->Iterate<const DumpArgs&>(DumpNonLiveIterator, args); + RawWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader)); + DumpProcSelfMaps(fd); + RawClose(fd); + return true; + } else { + RAW_LOG(ERROR, "Failed dumping filtered heap profile to %s", file_name); + return false; + } +} + +void HeapProfileTable::CleanupOldProfiles(const char* prefix) { + if (!FLAGS_cleanup_old_heap_profiles) + return; + string pattern = string(prefix) + ".*" + kFileExt; +#if defined(HAVE_GLOB_H) + glob_t g; + const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g); + if (r == 0 || r == GLOB_NOMATCH) { + const int prefix_length = strlen(prefix); + for (int i = 0; i < g.gl_pathc; i++) { + const char* fname = g.gl_pathv[i]; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(1, "Removing old heap profile %s", fname); + unlink(fname); + } + } + } + globfree(&g); +#else /* HAVE_GLOB_H */ + RAW_LOG(WARNING, "Unable to remove old heap profiles (can't run glob())"); +#endif +} + +HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() { + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + address_map_->Iterate(AddToSnapshot, s); + return s; +} + +void HeapProfileTable::ReleaseSnapshot(Snapshot* s) { + s->~Snapshot(); + dealloc_(s); +} + +// Callback from TakeSnapshot; adds a single entry to snapshot +void HeapProfileTable::AddToSnapshot(const void* ptr, AllocValue* v, + Snapshot* snapshot) { + snapshot->Add(ptr, *v); +} + +HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot( + Snapshot* base) { + RAW_VLOG(2, "NonLiveSnapshot input: %d %d\n", + int(total_.allocs - total_.frees), + int(total_.alloc_size - total_.free_size)); + + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + AddNonLiveArgs args; + args.dest = s; + args.base = base; + address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args); + RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n", + int(s->total_.allocs - s->total_.frees), + int(s->total_.alloc_size - s->total_.free_size)); + return s; +} + +// Information kept per unique bucket seen +struct HeapProfileTable::Snapshot::Entry { + int count; + int bytes; + Bucket* bucket; + Entry() : count(0), bytes(0) { } + + // Order by decreasing bytes + bool operator<(const Entry& x) const { + return this->bytes > x.bytes; + } +}; + +// State used to generate leak report. We keep a mapping from Bucket pointer +// the collected stats for that bucket. +struct HeapProfileTable::Snapshot::ReportState { + map<Bucket*, Entry> buckets_; +}; + +// Callback from ReportLeaks; updates ReportState. +void HeapProfileTable::Snapshot::ReportCallback(const void* ptr, + AllocValue* v, + ReportState* state) { + Entry* e = &state->buckets_[v->bucket()]; // Creates empty Entry first time + e->bucket = v->bucket(); + e->count++; + e->bytes += v->bytes; +} + +void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, + const char* filename, + bool should_symbolize) { + // This is only used by the heap leak checker, but is intimately + // tied to the allocation map that belongs in this module and is + // therefore placed here. + RAW_LOG(ERROR, "Leak check %s detected leaks of %" PRIuS " bytes " + "in %" PRIuS " objects", + checker_name, + size_t(total_.alloc_size), + size_t(total_.allocs)); + + // Group objects by Bucket + ReportState state; + map_.Iterate(&ReportCallback, &state); + + // Sort buckets by decreasing leaked size + const int n = state.buckets_.size(); + Entry* entries = new Entry[n]; + int dst = 0; + for (map<Bucket*,Entry>::const_iterator iter = state.buckets_.begin(); + iter != state.buckets_.end(); + ++iter) { + entries[dst++] = iter->second; + } + sort(entries, entries + n); + + // Report a bounded number of leaks to keep the leak report from + // growing too long. + const int to_report = + (FLAGS_heap_check_max_leaks > 0 && + n > FLAGS_heap_check_max_leaks) ? FLAGS_heap_check_max_leaks : n; + RAW_LOG(ERROR, "The %d largest leaks:", to_report); + + // Print + SymbolTable symbolization_table; + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + for (int j = 0; j < e.bucket->depth; j++) { + symbolization_table.Add(e.bucket->stack[j]); + } + } + static const int kBufSize = 2<<10; + char buffer[kBufSize]; + if (should_symbolize) + symbolization_table.Symbolize(); + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + base::RawPrinter printer(buffer, kBufSize); + printer.Printf("Leak of %d bytes in %d objects allocated from:\n", + e.bytes, e.count); + for (int j = 0; j < e.bucket->depth; j++) { + const void* pc = e.bucket->stack[j]; + printer.Printf("\t@ %" PRIxPTR " %s\n", + reinterpret_cast<uintptr_t>(pc), symbolization_table.GetSymbol(pc)); + } + RAW_LOG(ERROR, "%s", buffer); + } + + if (to_report < n) { + RAW_LOG(ERROR, "Skipping leaks numbered %d..%d", + to_report, n-1); + } + delete[] entries; + + // TODO: Dump the sorted Entry list instead of dumping raw data? + // (should be much shorter) + if (!HeapProfileTable::WriteProfile(filename, total_, &map_)) { + RAW_LOG(ERROR, "Could not write pprof profile to %s", filename); + } +} + +void HeapProfileTable::Snapshot::ReportObject(const void* ptr, + AllocValue* v, + char* unused) { + // Perhaps also log the allocation stack trace (unsymbolized) + // on this line in case somebody finds it useful. + RAW_LOG(ERROR, "leaked %" PRIuS " byte object %p", v->bytes, ptr); +} + +void HeapProfileTable::Snapshot::ReportIndividualObjects() { + char unused; + map_.Iterate(ReportObject, &unused); +} diff --git a/src/third_party/gperftools-2.7/src/heap-profile-table.h b/src/third_party/gperftools-2.7/src/heap-profile-table.h new file mode 100644 index 00000000000..3c6284741af --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-profile-table.h @@ -0,0 +1,399 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#ifndef BASE_HEAP_PROFILE_TABLE_H_ +#define BASE_HEAP_PROFILE_TABLE_H_ + +#include "addressmap-inl.h" +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD +#include "heap-profile-stats.h" + +// Table to maintain a heap profile data inside, +// i.e. the set of currently active heap memory allocations. +// thread-unsafe and non-reentrant code: +// each instance object must be used by one thread +// at a time w/o self-recursion. +// +// TODO(maxim): add a unittest for this class. +class HeapProfileTable { + public: + + // Extension to be used for heap pforile files. + static const char kFileExt[]; + + // Longest stack trace we record. + static const int kMaxStackDepth = 32; + + // data types ---------------------------- + + // Profile stats. + typedef HeapProfileStats Stats; + + // Info we can return about an allocation. + struct AllocInfo { + size_t object_size; // size of the allocation + const void* const* call_stack; // call stack that made the allocation call + int stack_depth; // depth of call_stack + bool live; + bool ignored; + }; + + // Info we return about an allocation context. + // An allocation context is a unique caller stack trace + // of an allocation operation. + struct AllocContextInfo : public Stats { + int stack_depth; // Depth of stack trace + const void* const* call_stack; // Stack trace + }; + + // Memory (de)allocator interface we'll use. + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + + // interface --------------------------- + + HeapProfileTable(Allocator alloc, DeAllocator dealloc, bool profile_mmap); + ~HeapProfileTable(); + + // Collect the stack trace for the function that asked to do the + // allocation for passing to RecordAlloc() below. + // + // The stack trace is stored in 'stack'. The stack depth is returned. + // + // 'skip_count' gives the number of stack frames between this call + // and the memory allocation function. + static int GetCallerStackTrace(int skip_count, void* stack[kMaxStackDepth]); + + // Record an allocation at 'ptr' of 'bytes' bytes. 'stack_depth' + // and 'call_stack' identifying the function that requested the + // allocation. They can be generated using GetCallerStackTrace() above. + void RecordAlloc(const void* ptr, size_t bytes, + int stack_depth, const void* const call_stack[]); + + // Record the deallocation of memory at 'ptr'. + void RecordFree(const void* ptr); + + // Return true iff we have recorded an allocation at 'ptr'. + // If yes, fill *object_size with the allocation byte size. + bool FindAlloc(const void* ptr, size_t* object_size) const; + // Same as FindAlloc, but fills all of *info. + bool FindAllocDetails(const void* ptr, AllocInfo* info) const; + + // Return true iff "ptr" points into a recorded allocation + // If yes, fill *object_ptr with the actual allocation address + // and *object_size with the allocation byte size. + // max_size specifies largest currently possible allocation size. + bool FindInsideAlloc(const void* ptr, size_t max_size, + const void** object_ptr, size_t* object_size) const; + + // If "ptr" points to a recorded allocation and it's not marked as live + // mark it as live and return true. Else return false. + // All allocations start as non-live. + bool MarkAsLive(const void* ptr); + + // If "ptr" points to a recorded allocation, mark it as "ignored". + // Ignored objects are treated like other objects, except that they + // are skipped in heap checking reports. + void MarkAsIgnored(const void* ptr); + + // Return current total (de)allocation statistics. It doesn't contain + // mmap'ed regions. + const Stats& total() const { return total_; } + + // Allocation data iteration callback: gets passed object pointer and + // fully-filled AllocInfo. + typedef void (*AllocIterator)(const void* ptr, const AllocInfo& info); + + // Iterate over the allocation profile data calling "callback" + // for every allocation. + void IterateAllocs(AllocIterator callback) const { + address_map_->Iterate(MapArgsAllocIterator, callback); + } + + // Allocation context profile data iteration callback + typedef void (*AllocContextIterator)(const AllocContextInfo& info); + + // Iterate over the allocation context profile data calling "callback" + // for every allocation context. Allocation contexts are ordered by the + // size of allocated space. + void IterateOrderedAllocContexts(AllocContextIterator callback) const; + + // Fill profile data into buffer 'buf' of size 'size' + // and return the actual size occupied by the dump in 'buf'. + // The profile buckets are dumped in the decreasing order + // of currently allocated bytes. + // We do not provision for 0-terminating 'buf'. + int FillOrderedProfile(char buf[], int size) const; + + // Cleanup any old profile files matching prefix + ".*" + kFileExt. + static void CleanupOldProfiles(const char* prefix); + + // Return a snapshot of the current contents of *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + // The result is only valid while this exists and until + // the snapshot is discarded by calling ReleaseSnapshot(). + class Snapshot; + Snapshot* TakeSnapshot(); + + // Release a previously taken snapshot. snapshot must not + // be used after this call. + void ReleaseSnapshot(Snapshot* snapshot); + + // Return a snapshot of every non-live, non-ignored object in *this. + // If "base" is non-NULL, skip any objects present in "base". + // As a side-effect, clears the "live" bit on every live object in *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + Snapshot* NonLiveSnapshot(Snapshot* base); + + private: + + // data types ---------------------------- + + // Hash table bucket to hold (de)allocation stats + // for a given allocation call stack trace. + typedef HeapProfileBucket Bucket; + + // Info stored in the address map + struct AllocValue { + // Access to the stack-trace bucket + Bucket* bucket() const { + return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(kMask)); + } + // This also does set_live(false). + void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); } + size_t bytes; // Number of bytes in this allocation + + // Access to the allocation liveness flag (for leak checking) + bool live() const { return bucket_rep & kLive; } + void set_live(bool l) { + bucket_rep = (bucket_rep & ~uintptr_t(kLive)) | (l ? kLive : 0); + } + + // Should this allocation be ignored if it looks like a leak? + bool ignore() const { return bucket_rep & kIgnore; } + void set_ignore(bool r) { + bucket_rep = (bucket_rep & ~uintptr_t(kIgnore)) | (r ? kIgnore : 0); + } + + private: + // We store a few bits in the bottom bits of bucket_rep. + // (Alignment is at least four, so we have at least two bits.) + static const int kLive = 1; + static const int kIgnore = 2; + static const int kMask = kLive | kIgnore; + + uintptr_t bucket_rep; + }; + + // helper for FindInsideAlloc + static size_t AllocValueSize(const AllocValue& v) { return v.bytes; } + + typedef AddressMap<AllocValue> AllocationMap; + + // Arguments that need to be passed DumpBucketIterator callback below. + struct BufferArgs { + BufferArgs(char* buf_arg, int buflen_arg, int bufsize_arg) + : buf(buf_arg), + buflen(buflen_arg), + bufsize(bufsize_arg) { + } + + char* buf; + int buflen; + int bufsize; + + DISALLOW_COPY_AND_ASSIGN(BufferArgs); + }; + + // Arguments that need to be passed DumpNonLiveIterator callback below. + struct DumpArgs { + DumpArgs(RawFD fd_arg, Stats* profile_stats_arg) + : fd(fd_arg), + profile_stats(profile_stats_arg) { + } + + RawFD fd; // file to write to + Stats* profile_stats; // stats to update (may be NULL) + }; + + // helpers ---------------------------- + + // Unparse bucket b and print its portion of profile dump into buf. + // We return the amount of space in buf that we use. We start printing + // at buf + buflen, and promise not to go beyond buf + bufsize. + // We do not provision for 0-terminating 'buf'. + // + // If profile_stats is non-NULL, we update *profile_stats by + // counting bucket b. + // + // "extra" is appended to the unparsed bucket. Typically it is empty, + // but may be set to something like " heapprofile" for the total + // bucket to indicate the type of the profile. + static int UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats); + + // Get the bucket for the caller stack trace 'key' of depth 'depth' + // creating the bucket if needed. + Bucket* GetBucket(int depth, const void* const key[]); + + // Helper for IterateAllocs to do callback signature conversion + // from AllocationMap::Iterate to AllocIterator. + static void MapArgsAllocIterator(const void* ptr, AllocValue* v, + AllocIterator callback) { + AllocInfo info; + info.object_size = v->bytes; + info.call_stack = v->bucket()->stack; + info.stack_depth = v->bucket()->depth; + info.live = v->live(); + info.ignored = v->ignore(); + callback(ptr, info); + } + + // Helper to dump a bucket. + inline static void DumpBucketIterator(const Bucket* bucket, + BufferArgs* args); + + // Helper for DumpNonLiveProfile to do object-granularity + // heap profile dumping. It gets passed to AllocationMap::Iterate. + inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args); + + // Helper for IterateOrderedAllocContexts and FillOrderedProfile. + // Creates a sorted list of Buckets whose length is num_buckets_. + // The caller is responsible for deallocating the returned list. + Bucket** MakeSortedBucketList() const; + + // Helper for TakeSnapshot. Saves object to snapshot. + static void AddToSnapshot(const void* ptr, AllocValue* v, Snapshot* s); + + // Arguments passed to AddIfNonLive + struct AddNonLiveArgs { + Snapshot* dest; + Snapshot* base; + }; + + // Helper for NonLiveSnapshot. Adds the object to the destination + // snapshot if it is non-live. + static void AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg); + + // Write contents of "*allocations" as a heap profile to + // "file_name". "total" must contain the total of all entries in + // "*allocations". + static bool WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations); + + // data ---------------------------- + + // Memory (de)allocator that we use. + Allocator alloc_; + DeAllocator dealloc_; + + // Overall profile stats; we use only the Stats part, + // but make it a Bucket to pass to UnparseBucket. + Bucket total_; + + bool profile_mmap_; + + // Bucket hash table for malloc. + // We hand-craft one instead of using one of the pre-written + // ones because we do not want to use malloc when operating on the table. + // It is only few lines of code, so no big deal. + Bucket** bucket_table_; + int num_buckets_; + + // Map of all currently allocated objects and mapped regions we know about. + AllocationMap* address_map_; + + DISALLOW_COPY_AND_ASSIGN(HeapProfileTable); +}; + +class HeapProfileTable::Snapshot { + public: + const Stats& total() const { return total_; } + + // Report anything in this snapshot as a leak. + // May use new/delete for temporary storage. + // If should_symbolize is true, will fork (which is not threadsafe) + // to turn addresses into symbol names. Set to false for maximum safety. + // Also writes a heap profile to "filename" that contains + // all of the objects in this snapshot. + void ReportLeaks(const char* checker_name, const char* filename, + bool should_symbolize); + + // Report the addresses of all leaked objects. + // May use new/delete for temporary storage. + void ReportIndividualObjects(); + + bool Empty() const { + return (total_.allocs == 0) && (total_.alloc_size == 0); + } + + private: + friend class HeapProfileTable; + + // Total count/size are stored in a Bucket so we can reuse UnparseBucket + Bucket total_; + + // We share the Buckets managed by the parent table, but have our + // own object->bucket map. + AllocationMap map_; + + Snapshot(Allocator alloc, DeAllocator dealloc) : map_(alloc, dealloc) { + memset(&total_, 0, sizeof(total_)); + } + + // Callback used to populate a Snapshot object with entries found + // in another allocation map. + inline void Add(const void* ptr, const AllocValue& v) { + map_.Insert(ptr, v); + total_.allocs++; + total_.alloc_size += v.bytes; + } + + // Helpers for sorting and generating leak reports + struct Entry; + struct ReportState; + static void ReportCallback(const void* ptr, AllocValue* v, ReportState*); + static void ReportObject(const void* ptr, AllocValue* v, char*); + + DISALLOW_COPY_AND_ASSIGN(Snapshot); +}; + +#endif // BASE_HEAP_PROFILE_TABLE_H_ diff --git a/src/third_party/gperftools-2.7/src/heap-profiler.cc b/src/third_party/gperftools-2.7/src/heap-profiler.cc new file mode 100755 index 00000000000..33a25acf335 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/heap-profiler.cc @@ -0,0 +1,622 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// TODO: Log large allocations + +#include <config.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> // for open() +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <errno.h> +#include <assert.h> +#include <sys/types.h> +#include <signal.h> + +#include <algorithm> +#include <string> + +#include <gperftools/heap-profiler.h> + +#include "base/logging.h" +#include "base/basictypes.h" // for PRId64, among other things +#include "base/googleinit.h" +#include "base/commandlineflags.h" +#include "malloc_hook-inl.h" +#include "tcmalloc_guard.h" +#include <gperftools/malloc_hook.h> +#include <gperftools/malloc_extension.h> +#include "base/spinlock.h" +#include "base/low_level_alloc.h" +#include "base/sysinfo.h" // for GetUniquePathFromEnv() +#include "heap-profile-table.h" +#include "memory_region_map.h" + + +#ifndef PATH_MAX +#ifdef MAXPATHLEN +#define PATH_MAX MAXPATHLEN +#else +#define PATH_MAX 4096 // seems conservative for max filename len! +#endif +#endif + +using STL_NAMESPACE::string; +using STL_NAMESPACE::sort; + +//---------------------------------------------------------------------- +// Flags that control heap-profiling +// +// The thread-safety of the profiler depends on these being immutable +// after main starts, so don't change them. +//---------------------------------------------------------------------- + +DEFINE_int64(heap_profile_allocation_interval, + EnvToInt64("HEAP_PROFILE_ALLOCATION_INTERVAL", 1 << 30 /*1GB*/), + "If non-zero, dump heap profiling information once every " + "specified number of bytes allocated by the program since " + "the last dump."); +DEFINE_int64(heap_profile_deallocation_interval, + EnvToInt64("HEAP_PROFILE_DEALLOCATION_INTERVAL", 0), + "If non-zero, dump heap profiling information once every " + "specified number of bytes deallocated by the program " + "since the last dump."); +// We could also add flags that report whenever inuse_bytes changes by +// X or -X, but there hasn't been a need for that yet, so we haven't. +DEFINE_int64(heap_profile_inuse_interval, + EnvToInt64("HEAP_PROFILE_INUSE_INTERVAL", 100 << 20 /*100MB*/), + "If non-zero, dump heap profiling information whenever " + "the high-water memory usage mark increases by the specified " + "number of bytes."); +DEFINE_int64(heap_profile_time_interval, + EnvToInt64("HEAP_PROFILE_TIME_INTERVAL", 0), + "If non-zero, dump heap profiling information once every " + "specified number of seconds since the last dump."); +DEFINE_bool(mmap_log, + EnvToBool("HEAP_PROFILE_MMAP_LOG", false), + "Should mmap/munmap calls be logged?"); +DEFINE_bool(mmap_profile, + EnvToBool("HEAP_PROFILE_MMAP", false), + "If heap-profiling is on, also profile mmap, mremap, and sbrk)"); +DEFINE_bool(only_mmap_profile, + EnvToBool("HEAP_PROFILE_ONLY_MMAP", false), + "If heap-profiling is on, only profile mmap, mremap, and sbrk; " + "do not profile malloc/new/etc"); + + +//---------------------------------------------------------------------- +// Locking +//---------------------------------------------------------------------- + +// A pthread_mutex has way too much lock contention to be used here. +// +// I would like to use Mutex, but it can call malloc(), +// which can cause us to fall into an infinite recursion. +// +// So we use a simple spinlock. +static SpinLock heap_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- +// Simple allocator for heap profiler's internal memory +//---------------------------------------------------------------------- + +static LowLevelAlloc::Arena *heap_profiler_memory; + +static void* ProfilerMalloc(size_t bytes) { + return LowLevelAlloc::AllocWithArena(bytes, heap_profiler_memory); +} +static void ProfilerFree(void* p) { + LowLevelAlloc::Free(p); +} + +// We use buffers of this size in DoGetHeapProfile. +static const int kProfileBufferSize = 1 << 20; + +// This is a last-ditch buffer we use in DumpProfileLocked in case we +// can't allocate more memory from ProfilerMalloc. We expect this +// will be used by HeapProfileEndWriter when the application has to +// exit due to out-of-memory. This buffer is allocated in +// HeapProfilerStart. Access to this must be protected by heap_lock. +static char* global_profiler_buffer = NULL; + + +//---------------------------------------------------------------------- +// Profiling control/state data +//---------------------------------------------------------------------- + +// Access to all of these is protected by heap_lock. +static bool is_on = false; // If are on as a subsytem. +static bool dumping = false; // Dumping status to prevent recursion +static char* filename_prefix = NULL; // Prefix used for profile file names + // (NULL if no need for dumping yet) +static int dump_count = 0; // How many dumps so far +static int64 last_dump_alloc = 0; // alloc_size when did we last dump +static int64 last_dump_free = 0; // free_size when did we last dump +static int64 high_water_mark = 0; // In-use-bytes at last high-water dump +static int64 last_dump_time = 0; // The time of the last dump + +static HeapProfileTable* heap_profile = NULL; // the heap profile table + +//---------------------------------------------------------------------- +// Profile generation +//---------------------------------------------------------------------- + +// Input must be a buffer of size at least 1MB. +static char* DoGetHeapProfileLocked(char* buf, int buflen) { + // We used to be smarter about estimating the required memory and + // then capping it to 1MB and generating the profile into that. + if (buf == NULL || buflen < 1) + return NULL; + + RAW_DCHECK(heap_lock.IsHeld(), ""); + int bytes_written = 0; + if (is_on) { + HeapProfileTable::Stats const stats = heap_profile->total(); + (void)stats; // avoid an unused-variable warning in non-debug mode. + bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); + // FillOrderedProfile should not reduce the set of active mmap-ed regions, + // hence MemoryRegionMap will let us remove everything we've added above: + RAW_DCHECK(stats.Equivalent(heap_profile->total()), ""); + // if this fails, we somehow removed by FillOrderedProfile + // more than we have added. + } + buf[bytes_written] = '\0'; + RAW_DCHECK(bytes_written == strlen(buf), ""); + + return buf; +} + +extern "C" char* GetHeapProfile() { + // Use normal malloc: we return the profile to the user to free it: + char* buffer = reinterpret_cast<char*>(malloc(kProfileBufferSize)); + SpinLockHolder l(&heap_lock); + return DoGetHeapProfileLocked(buffer, kProfileBufferSize); +} + +// defined below +static void NewHook(const void* ptr, size_t size); +static void DeleteHook(const void* ptr); + +// Helper for HeapProfilerDump. +static void DumpProfileLocked(const char* reason) { + RAW_DCHECK(heap_lock.IsHeld(), ""); + RAW_DCHECK(is_on, ""); + RAW_DCHECK(!dumping, ""); + + if (filename_prefix == NULL) return; // we do not yet need dumping + + dumping = true; + + // Make file name + char file_name[1000]; + dump_count++; + snprintf(file_name, sizeof(file_name), "%s.%04d%s", + filename_prefix, dump_count, HeapProfileTable::kFileExt); + + // Dump the profile + RAW_VLOG(0, "Dumping heap profile to %s (%s)", file_name, reason); + // We must use file routines that don't access memory, since we hold + // a memory lock now. + RawFD fd = RawOpenForWriting(file_name); + if (fd == kIllegalRawFD) { + RAW_LOG(ERROR, "Failed dumping heap profile to %s", file_name); + dumping = false; + return; + } + + // This case may be impossible, but it's best to be safe. + // It's safe to use the global buffer: we're protected by heap_lock. + if (global_profiler_buffer == NULL) { + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + } + + char* profile = DoGetHeapProfileLocked(global_profiler_buffer, + kProfileBufferSize); + RawWrite(fd, profile, strlen(profile)); + RawClose(fd); + + dumping = false; +} + +//---------------------------------------------------------------------- +// Profile collection +//---------------------------------------------------------------------- + +// Dump a profile after either an allocation or deallocation, if +// the memory use has changed enough since the last dump. +static void MaybeDumpProfileLocked() { + if (!dumping) { + const HeapProfileTable::Stats& total = heap_profile->total(); + const int64 inuse_bytes = total.alloc_size - total.free_size; + bool need_to_dump = false; + char buf[128]; + + if (FLAGS_heap_profile_allocation_interval > 0 && + total.alloc_size >= + last_dump_alloc + FLAGS_heap_profile_allocation_interval) { + snprintf(buf, sizeof(buf), ("%" PRId64 " MB allocated cumulatively, " + "%" PRId64 " MB currently in use"), + total.alloc_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_deallocation_interval > 0 && + total.free_size >= + last_dump_free + FLAGS_heap_profile_deallocation_interval) { + snprintf(buf, sizeof(buf), ("%" PRId64 " MB freed cumulatively, " + "%" PRId64 " MB currently in use"), + total.free_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_inuse_interval > 0 && + inuse_bytes > + high_water_mark + FLAGS_heap_profile_inuse_interval) { + snprintf(buf, sizeof(buf), "%" PRId64 " MB currently in use", + inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_time_interval > 0 ) { + int64 current_time = time(NULL); + if (current_time - last_dump_time >= + FLAGS_heap_profile_time_interval) { + snprintf(buf, sizeof(buf), "%" PRId64 " sec since the last dump", + current_time - last_dump_time); + need_to_dump = true; + last_dump_time = current_time; + } + } + if (need_to_dump) { + DumpProfileLocked(buf); + + last_dump_alloc = total.alloc_size; + last_dump_free = total.free_size; + if (inuse_bytes > high_water_mark) + high_water_mark = inuse_bytes; + } + } +} + +// Record an allocation in the profile. +static void RecordAlloc(const void* ptr, size_t bytes, int skip_count) { + // Take the stack trace outside the critical section. + void* stack[HeapProfileTable::kMaxStackDepth]; + int depth = HeapProfileTable::GetCallerStackTrace(skip_count + 1, stack); + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordAlloc(ptr, bytes, depth, stack); + MaybeDumpProfileLocked(); + } +} + +// Record a deallocation in the profile. +static void RecordFree(const void* ptr) { + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordFree(ptr); + MaybeDumpProfileLocked(); + } +} + +//---------------------------------------------------------------------- +// Allocation/deallocation hooks for MallocHook +//---------------------------------------------------------------------- + +// static +void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) RecordAlloc(ptr, size, 0); +} + +// static +void DeleteHook(const void* ptr) { + if (ptr != NULL) RecordFree(ptr); +} + +// TODO(jandrews): Re-enable stack tracing +#ifdef TODO_REENABLE_STACK_TRACING +static void RawInfoStackDumper(const char* message, void*) { + RAW_LOG(INFO, "%.*s", static_cast<int>(strlen(message) - 1), message); + // -1 is to chop the \n which will be added by RAW_LOG +} +#endif + +static void MmapHook(const void* result, const void* start, size_t size, + int prot, int flags, int fd, off_t offset) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mmap(start=0x%" PRIxPTR ", len=%" PRIuS ", prot=0x%x, flags=0x%x, " + "fd=%d, offset=0x%x) = 0x%" PRIxPTR "", + (uintptr_t) start, size, prot, flags, fd, (unsigned int) offset, + (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, + int flags, const void* new_addr) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mremap(old_addr=0x%" PRIxPTR ", old_size=%" PRIuS ", " + "new_size=%" PRIuS ", flags=0x%x, new_addr=0x%" PRIxPTR ") = " + "0x%" PRIxPTR "", + (uintptr_t) old_addr, old_size, new_size, flags, + (uintptr_t) new_addr, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void MunmapHook(const void* ptr, size_t size) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, "munmap(start=0x%" PRIxPTR ", len=%" PRIuS ")", + (uintptr_t) ptr, size); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +static void SbrkHook(const void* result, ptrdiff_t increment) { + if (FLAGS_mmap_log) { // log it + RAW_LOG(INFO, "sbrk(inc=%" PRIdS ") = 0x%" PRIxPTR "", + increment, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } +} + +//---------------------------------------------------------------------- +// Starting/stopping/dumping +//---------------------------------------------------------------------- + +extern "C" void HeapProfilerStart(const char* prefix) { + SpinLockHolder l(&heap_lock); + + if (is_on) return; + + is_on = true; + + RAW_VLOG(0, "Starting tracking the heap"); + + // This should be done before the hooks are set up, since it should + // call new, and we want that to be accounted for correctly. + MallocExtension::Initialize(); + + if (FLAGS_only_mmap_profile) { + FLAGS_mmap_profile = true; + } + + if (FLAGS_mmap_profile) { + // Ask MemoryRegionMap to record all mmap, mremap, and sbrk + // call stack traces of at least size kMaxStackDepth: + MemoryRegionMap::Init(HeapProfileTable::kMaxStackDepth, + /* use_buckets */ true); + } + + if (FLAGS_mmap_log) { + // Install our hooks to do the logging: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); + } + + heap_profiler_memory = + LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + + // Reserve space now for the heap profiler, so we can still write a + // heap profile even if the application runs out of memory. + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + + heap_profile = new(ProfilerMalloc(sizeof(HeapProfileTable))) + HeapProfileTable(ProfilerMalloc, ProfilerFree, FLAGS_mmap_profile); + + last_dump_alloc = 0; + last_dump_free = 0; + high_water_mark = 0; + last_dump_time = 0; + + // We do not reset dump_count so if the user does a sequence of + // HeapProfilerStart/HeapProfileStop, we will get a continuous + // sequence of profiles. + + if (FLAGS_only_mmap_profile == false) { + // Now set the hooks that capture new/delete and malloc/free. + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); + } + + // Copy filename prefix + RAW_DCHECK(filename_prefix == NULL, ""); + const int prefix_length = strlen(prefix); + filename_prefix = reinterpret_cast<char*>(ProfilerMalloc(prefix_length + 1)); + memcpy(filename_prefix, prefix, prefix_length); + filename_prefix[prefix_length] = '\0'; +} + +extern "C" int IsHeapProfilerRunning() { + SpinLockHolder l(&heap_lock); + return is_on ? 1 : 0; // return an int, because C code doesn't have bool +} + +extern "C" void HeapProfilerStop() { + SpinLockHolder l(&heap_lock); + + if (!is_on) return; + + if (FLAGS_only_mmap_profile == false) { + // Unset our new/delete hooks, checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); + } + if (FLAGS_mmap_log) { + // Restore mmap/sbrk hooks, checking that our hooks were set: + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); + } + + // free profile + heap_profile->~HeapProfileTable(); + ProfilerFree(heap_profile); + heap_profile = NULL; + + // free output-buffer memory + ProfilerFree(global_profiler_buffer); + + // free prefix + ProfilerFree(filename_prefix); + filename_prefix = NULL; + + if (!LowLevelAlloc::DeleteArena(heap_profiler_memory)) { + RAW_LOG(FATAL, "Memory leak in HeapProfiler:"); + } + + if (FLAGS_mmap_profile) { + MemoryRegionMap::Shutdown(); + } + + is_on = false; +} + +extern "C" void HeapProfilerDump(const char *reason) { + SpinLockHolder l(&heap_lock); + if (is_on && !dumping) { + DumpProfileLocked(reason); + } +} + +// Signal handler that is registered when a user selectable signal +// number is defined in the environment variable HEAPPROFILESIGNAL. +static void HeapProfilerDumpSignal(int signal_number) { + (void)signal_number; + if (!heap_lock.TryLock()) { + return; + } + if (is_on && !dumping) { + DumpProfileLocked("signal"); + } + heap_lock.Unlock(); +} + + +//---------------------------------------------------------------------- +// Initialization/finalization code +//---------------------------------------------------------------------- + +// Initialization code +static void HeapProfilerInit() { + // Everything after this point is for setting up the profiler based on envvar + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("HEAPPROFILE", fname)) { + return; + } + // We do a uid check so we don't write out files in a setuid executable. +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) { + RAW_LOG(WARNING, ("HeapProfiler: ignoring HEAPPROFILE because " + "program seems to be setuid\n")); + return; + } +#endif + + char *signal_number_str = getenv("HEAPPROFILESIGNAL"); + if (signal_number_str != NULL) { + long int signal_number = strtol(signal_number_str, NULL, 10); + intptr_t old_signal_handler = reinterpret_cast<intptr_t>(signal(signal_number, HeapProfilerDumpSignal)); + if (old_signal_handler == reinterpret_cast<intptr_t>(SIG_ERR)) { + RAW_LOG(FATAL, "Failed to set signal. Perhaps signal number %s is invalid\n", signal_number_str); + } else if (old_signal_handler == 0) { + RAW_LOG(INFO,"Using signal %d as heap profiling switch", signal_number); + } else { + RAW_LOG(FATAL, "Signal %d already in use\n", signal_number); + } + } + + HeapProfileTable::CleanupOldProfiles(fname); + + HeapProfilerStart(fname); +} + +// class used for finalization -- dumps the heap-profile at program exit +struct HeapProfileEndWriter { + ~HeapProfileEndWriter() { + char buf[128]; + if (heap_profile) { + const HeapProfileTable::Stats& total = heap_profile->total(); + const int64 inuse_bytes = total.alloc_size - total.free_size; + + if ((inuse_bytes >> 20) > 0) { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " MB in use"), + inuse_bytes >> 20); + } else if ((inuse_bytes >> 10) > 0) { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " kB in use"), + inuse_bytes >> 10); + } else { + snprintf(buf, sizeof(buf), ("Exiting, %" PRId64 " bytes in use"), + inuse_bytes); + } + } else { + snprintf(buf, sizeof(buf), ("Exiting")); + } + HeapProfilerDump(buf); + } +}; + +// We want to make sure tcmalloc is up and running before starting the profiler +static const TCMallocGuard tcmalloc_initializer; +REGISTER_MODULE_INITIALIZER(heapprofiler, HeapProfilerInit()); +static HeapProfileEndWriter heap_profile_end_writer; diff --git a/src/third_party/gperftools-2.7/src/internal_logging.cc b/src/third_party/gperftools-2.7/src/internal_logging.cc new file mode 100644 index 00000000000..708fa650974 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/internal_logging.cc @@ -0,0 +1,192 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "internal_logging.h" +#include <stdarg.h> // for va_end, va_start +#include <stdio.h> // for vsnprintf, va_list, etc +#include <stdlib.h> // for abort +#include <string.h> // for strlen, memcpy +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif + +#include <gperftools/malloc_extension.h> +#include "base/logging.h" // for perftools_vsnprintf +#include "base/spinlock.h" // for SpinLockHolder, SpinLock + +// Variables for storing crash output. Allocated statically since we +// may not be able to heap-allocate while crashing. +static SpinLock crash_lock(base::LINKER_INITIALIZED); +static bool crashed = false; +static const int kStatsBufferSize = 16 << 10; +static char stats_buffer[kStatsBufferSize] = { 0 }; + +namespace tcmalloc { + +static void WriteMessage(const char* msg, int length) { + write(STDERR_FILENO, msg, length); +} + +void (*log_message_writer)(const char* msg, int length) = WriteMessage; + + +class Logger { + public: + bool Add(const LogItem& item); + bool AddStr(const char* str, int n); + bool AddNum(uint64_t num, int base); // base must be 10 or 16. + + static const int kBufSize = 200; + char* p_; + char* end_; + char buf_[kBufSize]; +}; + +void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b, LogItem c, LogItem d) { + Logger state; + state.p_ = state.buf_; + state.end_ = state.buf_ + sizeof(state.buf_); + state.AddStr(filename, strlen(filename)) + && state.AddStr(":", 1) + && state.AddNum(line, 10) + && state.AddStr("]", 1) + && state.Add(a) + && state.Add(b) + && state.Add(c) + && state.Add(d); + + // Teminate with newline + if (state.p_ >= state.end_) { + state.p_ = state.end_ - 1; + } + *state.p_ = '\n'; + state.p_++; + + int msglen = state.p_ - state.buf_; + if (mode == kLog) { + (*log_message_writer)(state.buf_, msglen); + return; + } + + bool first_crash = false; + { + SpinLockHolder l(&crash_lock); + if (!crashed) { + crashed = true; + first_crash = true; + } + } + + (*log_message_writer)(state.buf_, msglen); + if (first_crash && mode == kCrashWithStats) { + MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize); + (*log_message_writer)(stats_buffer, strlen(stats_buffer)); + } + + abort(); +} + +bool Logger::Add(const LogItem& item) { + // Separate items with spaces + if (p_ < end_) { + *p_ = ' '; + p_++; + } + + switch (item.tag_) { + case LogItem::kStr: + return AddStr(item.u_.str, strlen(item.u_.str)); + case LogItem::kUnsigned: + return AddNum(item.u_.unum, 10); + case LogItem::kSigned: + if (item.u_.snum < 0) { + // The cast to uint64_t is intentionally before the negation + // so that we do not attempt to negate -2^63. + return AddStr("-", 1) + && AddNum(- static_cast<uint64_t>(item.u_.snum), 10); + } else { + return AddNum(static_cast<uint64_t>(item.u_.snum), 10); + } + case LogItem::kPtr: + return AddStr("0x", 2) + && AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16); + default: + return false; + } +} + +bool Logger::AddStr(const char* str, int n) { + if (end_ - p_ < n) { + return false; + } else { + memcpy(p_, str, n); + p_ += n; + return true; + } +} + +bool Logger::AddNum(uint64_t num, int base) { + static const char kDigits[] = "0123456789abcdef"; + char space[22]; // more than enough for 2^64 in smallest supported base (10) + char* end = space + sizeof(space); + char* pos = end; + do { + pos--; + *pos = kDigits[num % base]; + num /= base; + } while (num > 0 && pos > space); + return AddStr(pos, end - pos); +} + +} // end tcmalloc namespace + +void TCMalloc_Printer::printf(const char* format, ...) { + if (left_ > 0) { + va_list ap; + va_start(ap, format); + const int r = perftools_vsnprintf(buf_, left_, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + left_ = 0; + } else if (r > left_) { + // Truncation + left_ = 0; + } else { + left_ -= r; + buf_ += r; + } + } +} diff --git a/src/third_party/gperftools-2.7/src/internal_logging.h b/src/third_party/gperftools-2.7/src/internal_logging.h new file mode 100644 index 00000000000..0c300c3e20d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/internal_logging.h @@ -0,0 +1,144 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Internal logging and related utility routines. + +#ifndef TCMALLOC_INTERNAL_LOGGING_H_ +#define TCMALLOC_INTERNAL_LOGGING_H_ + +#include <config.h> +#include <stddef.h> // for size_t +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif + +//------------------------------------------------------------------- +// Utility routines +//------------------------------------------------------------------- + +// Safe logging helper: we write directly to the stderr file +// descriptor and avoid FILE buffering because that may invoke +// malloc(). +// +// Example: +// Log(kLog, __FILE__, __LINE__, "error", bytes); + +namespace tcmalloc { +enum LogMode { + kLog, // Just print the message + kCrash, // Print the message and crash + kCrashWithStats // Print the message, some stats, and crash +}; + +class Logger; + +// A LogItem holds any of the argument types that can be passed to Log() +class LogItem { + public: + LogItem() : tag_(kEnd) { } + LogItem(const char* v) : tag_(kStr) { u_.str = v; } + LogItem(int v) : tag_(kSigned) { u_.snum = v; } + LogItem(long v) : tag_(kSigned) { u_.snum = v; } + LogItem(long long v) : tag_(kSigned) { u_.snum = v; } + LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; } + LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; } + private: + friend class Logger; + enum Tag { + kStr, + kSigned, + kUnsigned, + kPtr, + kEnd + }; + Tag tag_; + union { + const char* str; + const void* ptr; + int64_t snum; + uint64_t unum; + } u_; +}; + +extern PERFTOOLS_DLL_DECL void Log(LogMode mode, const char* filename, int line, + LogItem a, LogItem b = LogItem(), + LogItem c = LogItem(), LogItem d = LogItem()); + +// Tests can override this function to collect logging messages. +extern PERFTOOLS_DLL_DECL void (*log_message_writer)(const char* msg, int length); + +} // end tcmalloc namespace + +// Like assert(), but executed even in NDEBUG mode +#undef CHECK_CONDITION +#define CHECK_CONDITION(cond) \ +do { \ + if (!(cond)) { \ + ::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond); \ + } \ +} while (0) + +// Our own version of assert() so we can avoid hanging by trying to do +// all kinds of goofy printing while holding the malloc lock. +#ifndef NDEBUG +#define ASSERT(cond) CHECK_CONDITION(cond) +#else +#define ASSERT(cond) ((void) 0) +#endif + +// Print into buffer +class TCMalloc_Printer { + private: + char* buf_; // Where should we write next + int left_; // Space left in buffer (including space for \0) + + public: + // REQUIRES: "length > 0" + TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) { + buf[0] = '\0'; + } + + void printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; +}; + +#endif // TCMALLOC_INTERNAL_LOGGING_H_ diff --git a/src/third_party/gperftools-2.7/src/libc_override.h b/src/third_party/gperftools-2.7/src/libc_override.h new file mode 100644 index 00000000000..c981c3d593f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/libc_override.h @@ -0,0 +1,99 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// This .h file imports the code that causes tcmalloc to override libc +// versions of malloc/free/new/delete/etc. That is, it provides the +// logic that makes it so calls to malloc(10) go through tcmalloc, +// rather than the default (libc) malloc. +// +// This file also provides a method: ReplaceSystemAlloc(), that every +// libc_override_*.h file it #includes is required to provide. This +// is called when first setting up tcmalloc -- that is, when a global +// constructor in tcmalloc.cc is executed -- to do any initialization +// work that may be required for this OS. (Note we cannot entirely +// control when tcmalloc is initialized, and the system may do some +// mallocs and frees before this routine is called.) It may be a +// noop. +// +// Every libc has its own way of doing this, and sometimes the compiler +// matters too, so we have a different file for each libc, and often +// for different compilers and OS's. + +#ifndef TCMALLOC_LIBC_OVERRIDE_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_INL_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif +#include <gperftools/tcmalloc.h> + +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) +#define CPP_NOTHROW noexcept +#define CPP_BADALLOC +#else +#define CPP_NOTHROW throw() +#define CPP_BADALLOC throw(std::bad_alloc) +#endif + +static void ReplaceSystemAlloc(); // defined in the .h files below + +// For windows, there are two ways to get tcmalloc. If we're +// patching, then src/windows/patch_function.cc will do the necessary +// overriding here. Otherwise, we doing the 'redefine' trick, where +// we remove malloc/new/etc from mscvcrt.dll, and just need to define +// them now. +#if defined(_WIN32) && defined(WIN32_DO_PATCHING) +void PatchWindowsFunctions(); // in src/windows/patch_function.cc +static void ReplaceSystemAlloc() { PatchWindowsFunctions(); } + +#elif defined(_WIN32) && !defined(WIN32_DO_PATCHING) +#include "libc_override_redefine.h" + +#elif defined(__APPLE__) +#include "libc_override_osx.h" + +#elif defined(__GLIBC__) +#include "libc_override_glibc.h" + +// Not all gcc systems necessarily support weak symbols, but all the +// ones I know of do, so for now just assume they all do. +#elif defined(__GNUC__) +#include "libc_override_gcc_and_weak.h" + +#else +#error Need to add support for your libc/OS here + +#endif + +#endif // TCMALLOC_LIBC_OVERRIDE_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/libc_override_gcc_and_weak.h b/src/third_party/gperftools-2.7/src/libc_override_gcc_and_weak.h new file mode 100644 index 00000000000..687516467f0 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/libc_override_gcc_and_weak.h @@ -0,0 +1,244 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on systems that define the +// memory allocation routines to be weak symbols in their libc +// (almost all unix-based systems are like this), on gcc, which +// suppports the 'alias' attribute. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ + +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // for __THROW +#endif +#include <gperftools/tcmalloc.h> + +#include "getenv_safe.h" // TCMallocGetenvSafe +#include "base/commandlineflags.h" + +#ifndef __THROW // I guess we're not on a glibc-like system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#ifndef __GNUC__ +# error libc_override_gcc_and_weak.h is for gcc distributions only. +#endif + +#define ALIAS(tc_fn) __attribute__ ((alias (#tc_fn), used)) + +void* operator new(size_t size) CPP_BADALLOC ALIAS(tc_new); +void operator delete(void* p) CPP_NOTHROW ALIAS(tc_delete); +void* operator new[](size_t size) CPP_BADALLOC ALIAS(tc_newarray); +void operator delete[](void* p) CPP_NOTHROW ALIAS(tc_deletearray); +void* operator new(size_t size, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_new_nothrow); +void* operator new[](size_t size, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_newarray_nothrow); +void operator delete(void* p, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_delete_nothrow); +void operator delete[](void* p, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_deletearray_nothrow); + +#if defined(ENABLE_SIZED_DELETE) + +void operator delete(void *p, size_t size) CPP_NOTHROW + ALIAS(tc_delete_sized); +void operator delete[](void *p, size_t size) CPP_NOTHROW + ALIAS(tc_deletearray_sized); + +#elif defined(ENABLE_DYNAMIC_SIZED_DELETE) && \ + (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 + +static void delegate_sized_delete(void *p, size_t s) { + (operator delete)(p); +} + +static void delegate_sized_deletearray(void *p, size_t s) { + (operator delete[])(p); +} + +extern "C" __attribute__((weak)) +int tcmalloc_sized_delete_enabled(void); + +static bool sized_delete_enabled(void) { + if (tcmalloc_sized_delete_enabled != 0) { + return !!tcmalloc_sized_delete_enabled(); + } + + const char *flag = TCMallocGetenvSafe("TCMALLOC_ENABLE_SIZED_DELETE"); + return tcmalloc::commandlineflags::StringToBool(flag, false); +} + +extern "C" { + +static void *resolve_delete_sized(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_delete_sized); + } + return reinterpret_cast<void *>(delegate_sized_delete); +} + +static void *resolve_deletearray_sized(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_deletearray_sized); + } + return reinterpret_cast<void *>(delegate_sized_deletearray); +} + +} + +void operator delete(void *p, size_t size) CPP_NOTHROW + __attribute__((ifunc("resolve_delete_sized"))); +void operator delete[](void *p, size_t size) CPP_NOTHROW + __attribute__((ifunc("resolve_deletearray_sized"))); + +#else /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */ + +void operator delete(void *p, size_t size) CPP_NOTHROW + ALIAS(tc_delete); +void operator delete[](void *p, size_t size) CPP_NOTHROW + ALIAS(tc_deletearray); + +#endif /* !ENABLE_SIZED_DELETE && !ENABLE_DYN_SIZED_DELETE */ + +#if defined(ENABLE_ALIGNED_NEW_DELETE) + +void* operator new(size_t size, std::align_val_t al) + ALIAS(tc_new_aligned); +void operator delete(void* p, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_delete_aligned); +void* operator new[](size_t size, std::align_val_t al) + ALIAS(tc_newarray_aligned); +void operator delete[](void* p, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_deletearray_aligned); +void* operator new(size_t size, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_new_aligned_nothrow); +void* operator new[](size_t size, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_newarray_aligned_nothrow); +void operator delete(void* p, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_delete_aligned_nothrow); +void operator delete[](void* p, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW + ALIAS(tc_deletearray_aligned_nothrow); + +#if defined(ENABLE_SIZED_DELETE) + +void operator delete(void *p, size_t size, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_delete_sized_aligned); +void operator delete[](void *p, size_t size, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_deletearray_sized_aligned); + +#else /* defined(ENABLE_SIZED_DELETE) */ + +#if defined(ENABLE_DYNAMIC_SIZED_DELETE) && \ + (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 + +static void delegate_sized_aligned_delete(void *p, size_t s, std::align_val_t al) { + (operator delete)(p, al); +} + +static void delegate_sized_aligned_deletearray(void *p, size_t s, std::align_val_t al) { + (operator delete[])(p, al); +} + +extern "C" { + +static void *resolve_delete_sized_aligned(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_delete_sized_aligned); + } + return reinterpret_cast<void *>(delegate_sized_aligned_delete); +} + +static void *resolve_deletearray_sized_aligned(void) { + if (sized_delete_enabled()) { + return reinterpret_cast<void *>(tc_deletearray_sized_aligned); + } + return reinterpret_cast<void *>(delegate_sized_aligned_deletearray); +} + +} + +void operator delete(void *p, size_t size, std::align_val_t al) CPP_NOTHROW + __attribute__((ifunc("resolve_delete_sized_aligned"))); +void operator delete[](void *p, size_t size, std::align_val_t al) CPP_NOTHROW + __attribute__((ifunc("resolve_deletearray_sized_aligned"))); + +#else /* defined(ENABLE_DYN_SIZED_DELETE) */ + +void operator delete(void *p, size_t size, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_delete); +void operator delete[](void *p, size_t size, std::align_val_t al) CPP_NOTHROW + ALIAS(tc_deletearray); + +#endif /* defined(ENABLE_DYN_SIZED_DELETE) */ + +#endif /* defined(ENABLE_SIZED_DELETE) */ + +#endif /* defined(ENABLE_ALIGNED_NEW_DELETE) */ + +extern "C" { + void* malloc(size_t size) __THROW ALIAS(tc_malloc); + void free(void* ptr) __THROW ALIAS(tc_free); + void* realloc(void* ptr, size_t size) __THROW ALIAS(tc_realloc); + void* calloc(size_t n, size_t size) __THROW ALIAS(tc_calloc); + void cfree(void* ptr) __THROW ALIAS(tc_cfree); + void* memalign(size_t align, size_t s) __THROW ALIAS(tc_memalign); + void* aligned_alloc(size_t align, size_t s) __THROW ALIAS(tc_memalign); + void* valloc(size_t size) __THROW ALIAS(tc_valloc); + void* pvalloc(size_t size) __THROW ALIAS(tc_pvalloc); + int posix_memalign(void** r, size_t a, size_t s) __THROW + ALIAS(tc_posix_memalign); +#ifndef __UCLIBC__ + void malloc_stats(void) __THROW ALIAS(tc_malloc_stats); +#endif + int mallopt(int cmd, int value) __THROW ALIAS(tc_mallopt); +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) __THROW ALIAS(tc_mallinfo); +#endif + size_t malloc_size(void* p) __THROW ALIAS(tc_malloc_size); +#if defined(__ANDROID__) + size_t malloc_usable_size(const void* p) __THROW + ALIAS(tc_malloc_size); +#else + size_t malloc_usable_size(void* p) __THROW ALIAS(tc_malloc_size); +#endif +} // extern "C" + +#undef ALIAS + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/libc_override_glibc.h b/src/third_party/gperftools-2.7/src/libc_override_glibc.h new file mode 100644 index 00000000000..32692132a38 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/libc_override_glibc.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on systems that are using glibc. + +#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ + +#include <config.h> +#include <features.h> // for __GLIBC__ +#include <gperftools/tcmalloc.h> + +#ifndef __GLIBC__ +# error libc_override_glibc.h is for glibc distributions only. +#endif + +// In glibc, the memory-allocation methods are weak symbols, so we can +// just override them with our own. If we're using gcc, we can use +// __attribute__((alias)) to do the overriding easily (exception: +// Mach-O, which doesn't support aliases). Otherwise we have to use a +// function call. +#if !defined(__GNUC__) || defined(__MACH__) + +// This also defines ReplaceSystemAlloc(). +# include "libc_override_redefine.h" // defines functions malloc()/etc + +#else // #if !defined(__GNUC__) || defined(__MACH__) + +// If we get here, we're a gcc system, so do all the overriding we do +// with gcc. This does the overriding of all the 'normal' memory +// allocation. This also defines ReplaceSystemAlloc(). +# include "libc_override_gcc_and_weak.h" + +// We also have to do some glibc-specific overriding. Some library +// routines on RedHat 9 allocate memory using malloc() and free it +// using __libc_free() (or vice-versa). Since we provide our own +// implementations of malloc/free, we need to make sure that the +// __libc_XXX variants (defined as part of glibc) also point to the +// same implementations. Since it only matters for redhat, we +// do it inside the gcc #ifdef, since redhat uses gcc. +// TODO(csilvers): only do this if we detect we're an old enough glibc? + +#define ALIAS(tc_fn) __attribute__ ((alias (#tc_fn))) +extern "C" { + void* __libc_malloc(size_t size) ALIAS(tc_malloc); + void __libc_free(void* ptr) ALIAS(tc_free); + void* __libc_realloc(void* ptr, size_t size) ALIAS(tc_realloc); + void* __libc_calloc(size_t n, size_t size) ALIAS(tc_calloc); + void __libc_cfree(void* ptr) ALIAS(tc_cfree); + void* __libc_memalign(size_t align, size_t s) ALIAS(tc_memalign); + void* __libc_valloc(size_t size) ALIAS(tc_valloc); + void* __libc_pvalloc(size_t size) ALIAS(tc_pvalloc); + int __posix_memalign(void** r, size_t a, size_t s) ALIAS(tc_posix_memalign); +} // extern "C" +#undef ALIAS + +#endif // #if defined(__GNUC__) && !defined(__MACH__) + +// No need to write ReplaceSystemAlloc(); one of the #includes above +// did it for us. + +#endif // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/libc_override_osx.h b/src/third_party/gperftools-2.7/src/libc_override_osx.h new file mode 100644 index 00000000000..9d5d611504a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/libc_override_osx.h @@ -0,0 +1,308 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used to override malloc routines on OS X systems. We use the +// malloc-zone functionality built into OS X to register our malloc +// routine. +// +// 1) We used to use the normal 'override weak libc malloc/etc' +// technique for OS X. This is not optimal because mach does not +// support the 'alias' attribute, so we had to have forwarding +// functions. It also does not work very well with OS X shared +// libraries (dylibs) -- in general, the shared libs don't use +// tcmalloc unless run with the DYLD_FORCE_FLAT_NAMESPACE envvar. +// +// 2) Another approach would be to use an interposition array: +// static const interpose_t interposers[] __attribute__((section("__DATA, __interpose"))) = { +// { (void *)tc_malloc, (void *)malloc }, +// { (void *)tc_free, (void *)free }, +// }; +// This requires the user to set the DYLD_INSERT_LIBRARIES envvar, so +// is not much better. +// +// 3) Registering a new malloc zone avoids all these issues: +// http://www.opensource.apple.com/source/Libc/Libc-583/include/malloc/malloc.h +// http://www.opensource.apple.com/source/Libc/Libc-583/gen/malloc.c +// If we make tcmalloc the default malloc zone (undocumented but +// possible) then all new allocs use it, even those in shared +// libraries. Allocs done before tcmalloc was installed, or in libs +// that aren't using tcmalloc for some reason, will correctly go +// through the malloc-zone interface when free-ing, and will pick up +// the libc free rather than tcmalloc free. So it should "never" +// cause a crash (famous last words). +// +// 4) The routines one must define for one's own malloc have changed +// between OS X versions. This requires some hoops on our part, but +// is only really annoying when it comes to posix_memalign. The right +// behavior there depends on what OS version tcmalloc was compiled on, +// but also what OS version the program is running on. For now, we +// punt and don't implement our own posix_memalign. Apps that really +// care can use tc_posix_memalign directly. + +#ifndef TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ +#define TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> +#endif +#include <gperftools/tcmalloc.h> + +#if !defined(__APPLE__) +# error libc_override_glibc-osx.h is for OS X distributions only. +#endif + +#include <AvailabilityMacros.h> +#include <malloc/malloc.h> + +namespace tcmalloc { + void CentralCacheLockAll(); + void CentralCacheUnlockAll(); +} + +// from AvailabilityMacros.h +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 +extern "C" { + // This function is only available on 10.6 (and later) but the + // LibSystem headers do not use AvailabilityMacros.h to handle weak + // importing automatically. This prototype is a copy of the one in + // <malloc/malloc.h> with the WEAK_IMPORT_ATTRBIUTE added. + extern malloc_zone_t *malloc_default_purgeable_zone(void) + WEAK_IMPORT_ATTRIBUTE; +} +#endif + +// We need to provide wrappers around all the libc functions. +namespace { +size_t mz_size(malloc_zone_t* zone, const void* ptr) { + if (MallocExtension::instance()->GetOwnership(ptr) != MallocExtension::kOwned) + return 0; // malloc_zone semantics: return 0 if we don't own the memory + + // TODO(csilvers): change this method to take a const void*, one day. + return MallocExtension::instance()->GetAllocatedSize(const_cast<void*>(ptr)); +} + +void* mz_malloc(malloc_zone_t* zone, size_t size) { + return tc_malloc(size); +} + +void* mz_calloc(malloc_zone_t* zone, size_t num_items, size_t size) { + return tc_calloc(num_items, size); +} + +void* mz_valloc(malloc_zone_t* zone, size_t size) { + return tc_valloc(size); +} + +void mz_free(malloc_zone_t* zone, void* ptr) { + return tc_free(ptr); +} + +void* mz_realloc(malloc_zone_t* zone, void* ptr, size_t size) { + return tc_realloc(ptr, size); +} + +void* mz_memalign(malloc_zone_t* zone, size_t align, size_t size) { + return tc_memalign(align, size); +} + +void mz_destroy(malloc_zone_t* zone) { + // A no-op -- we will not be destroyed! +} + +// malloc_introspection callbacks. I'm not clear on what all of these do. +kern_return_t mi_enumerator(task_t task, void *, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) { + // Should enumerate all the pointers we have. Seems like a lot of work. + return KERN_FAILURE; +} + +size_t mi_good_size(malloc_zone_t *zone, size_t size) { + // I think it's always safe to return size, but we maybe could do better. + return size; +} + +boolean_t mi_check(malloc_zone_t *zone) { + return MallocExtension::instance()->VerifyAllMemory(); +} + +void mi_print(malloc_zone_t *zone, boolean_t verbose) { + int bufsize = 8192; + if (verbose) + bufsize = 102400; // I picked this size arbitrarily + char* buffer = new char[bufsize]; + MallocExtension::instance()->GetStats(buffer, bufsize); + fprintf(stdout, "%s", buffer); + delete[] buffer; +} + +void mi_log(malloc_zone_t *zone, void *address) { + // I don't think we support anything like this +} + +void mi_force_lock(malloc_zone_t *zone) { + tcmalloc::CentralCacheLockAll(); +} + +void mi_force_unlock(malloc_zone_t *zone) { + tcmalloc::CentralCacheUnlockAll(); +} + +void mi_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) { + // TODO(csilvers): figure out how to fill these out + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +boolean_t mi_zone_locked(malloc_zone_t *zone) { + return false; // Hopefully unneeded by us! +} + +} // unnamed namespace + +// OS X doesn't have pvalloc, cfree, malloc_statc, etc, so we can just +// define our own. :-) OS X supplies posix_memalign in some versions +// but not others, either strongly or weakly linked, in a way that's +// difficult enough to code to correctly, that I just don't try to +// support either memalign() or posix_memalign(). If you need them +// and are willing to code to tcmalloc, you can use tc_posix_memalign(). +extern "C" { + void cfree(void* p) { tc_cfree(p); } + void* pvalloc(size_t s) { return tc_pvalloc(s); } + void malloc_stats(void) { tc_malloc_stats(); } + int mallopt(int cmd, int v) { return tc_mallopt(cmd, v); } + // No struct mallinfo on OS X, so don't define mallinfo(). + // An alias for malloc_size(), which OS X defines. + size_t malloc_usable_size(void* p) { return tc_malloc_size(p); } +} // extern "C" + +static malloc_zone_t *get_default_zone() { + malloc_zone_t **zones = NULL; + unsigned int num_zones = 0; + + /* + * On OSX 10.12, malloc_default_zone returns a special zone that is not + * present in the list of registered zones. That zone uses a "lite zone" + * if one is present (apparently enabled when malloc stack logging is + * enabled), or the first registered zone otherwise. In practice this + * means unless malloc stack logging is enabled, the first registered + * zone is the default. + * So get the list of zones to get the first one, instead of relying on + * malloc_default_zone. + */ + if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones, + &num_zones)) { + /* Reset the value in case the failure happened after it was set. */ + num_zones = 0; + } + + if (num_zones) + return zones[0]; + + return malloc_default_zone(); +} + + +static void ReplaceSystemAlloc() { + static malloc_introspection_t tcmalloc_introspection; + memset(&tcmalloc_introspection, 0, sizeof(tcmalloc_introspection)); + + tcmalloc_introspection.enumerator = &mi_enumerator; + tcmalloc_introspection.good_size = &mi_good_size; + tcmalloc_introspection.check = &mi_check; + tcmalloc_introspection.print = &mi_print; + tcmalloc_introspection.log = &mi_log; + tcmalloc_introspection.force_lock = &mi_force_lock; + tcmalloc_introspection.force_unlock = &mi_force_unlock; + + static malloc_zone_t tcmalloc_zone; + memset(&tcmalloc_zone, 0, sizeof(malloc_zone_t)); + + // Start with a version 4 zone which is used for OS X 10.4 and 10.5. + tcmalloc_zone.version = 4; + tcmalloc_zone.zone_name = "tcmalloc"; + tcmalloc_zone.size = &mz_size; + tcmalloc_zone.malloc = &mz_malloc; + tcmalloc_zone.calloc = &mz_calloc; + tcmalloc_zone.valloc = &mz_valloc; + tcmalloc_zone.free = &mz_free; + tcmalloc_zone.realloc = &mz_realloc; + tcmalloc_zone.destroy = &mz_destroy; + tcmalloc_zone.batch_malloc = NULL; + tcmalloc_zone.batch_free = NULL; + tcmalloc_zone.introspect = &tcmalloc_introspection; + + // from AvailabilityMacros.h +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + // Switch to version 6 on OSX 10.6 to support memalign. + tcmalloc_zone.version = 6; + tcmalloc_zone.free_definite_size = NULL; + tcmalloc_zone.memalign = &mz_memalign; + tcmalloc_introspection.zone_locked = &mi_zone_locked; + + // Request the default purgable zone to force its creation. The + // current default zone is registered with the purgable zone for + // doing tiny and small allocs. Sadly, it assumes that the default + // zone is the szone implementation from OS X and will crash if it + // isn't. By creating the zone now, this will be true and changing + // the default zone won't cause a problem. This only needs to + // happen when actually running on OS X 10.6 and higher (note the + // ifdef above only checks if we were *compiled* with 10.6 or + // higher; at runtime we have to check if this symbol is defined.) + if (malloc_default_purgeable_zone) { + malloc_default_purgeable_zone(); + } +#endif + + // Register the tcmalloc zone. At this point, it will not be the + // default zone. + malloc_zone_register(&tcmalloc_zone); + + // Unregister and reregister the default zone. Unregistering swaps + // the specified zone with the last one registered which for the + // default zone makes the more recently registered zone the default + // zone. The default zone is then re-registered to ensure that + // allocations made from it earlier will be handled correctly. + // Things are not guaranteed to work that way, but it's how they work now. + malloc_zone_t *default_zone = get_default_zone(); + malloc_zone_unregister(default_zone); + malloc_zone_register(default_zone); +} + +#endif // TCMALLOC_LIBC_OVERRIDE_OSX_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/libc_override_redefine.h b/src/third_party/gperftools-2.7/src/libc_override_redefine.h new file mode 100644 index 00000000000..4d61b25a91a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/libc_override_redefine.h @@ -0,0 +1,131 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Used on systems that don't have their own definition of +// malloc/new/etc. (Typically this will be a windows msvcrt.dll that +// has been edited to remove the definitions.) We can just define our +// own as normal functions. +// +// This should also work on systems were all the malloc routines are +// defined as weak symbols, and there's no support for aliasing. + +#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ +#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ + +void* operator new(size_t size) { return tc_new(size); } +void operator delete(void* p) CPP_NOTHROW { tc_delete(p); } +void* operator new[](size_t size) { return tc_newarray(size); } +void operator delete[](void* p) CPP_NOTHROW { tc_deletearray(p); } +void* operator new(size_t size, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_new_nothrow(size, nt); +} +void* operator new[](size_t size, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_newarray_nothrow(size, nt); +} +void operator delete(void* ptr, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_delete_nothrow(ptr, nt); +} +void operator delete[](void* ptr, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_deletearray_nothrow(ptr, nt); +} + +#ifdef ENABLE_SIZED_DELETE +void operator delete(void* p, size_t s) CPP_NOTHROW { tc_delete_sized(p, s); } +void operator delete[](void* p, size_t s) CPP_NOTHROW{ tc_deletearray_sized(p, s);} +#endif + +#if defined(ENABLE_ALIGNED_NEW_DELETE) + +void* operator new(size_t size, std::align_val_t al) { + return tc_new_aligned(size, al); +} +void operator delete(void* p, std::align_val_t al) CPP_NOTHROW { + tc_delete_aligned(p, al); +} +void* operator new[](size_t size, std::align_val_t al) { + return tc_newarray_aligned(size, al); +} +void operator delete[](void* p, std::align_val_t al) CPP_NOTHROW { + tc_deletearray_aligned(p, al); +} +void* operator new(size_t size, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_new_aligned_nothrow(size, al, nt); +} +void* operator new[](size_t size, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_newarray_aligned_nothrow(size, al, nt); +} +void operator delete(void* ptr, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_delete_aligned_nothrow(ptr, al, nt); +} +void operator delete[](void* ptr, std::align_val_t al, const std::nothrow_t& nt) CPP_NOTHROW { + return tc_deletearray_aligned_nothrow(ptr, al, nt); +} + +#ifdef ENABLE_SIZED_DELETE +void operator delete(void* p, size_t s, std::align_val_t al) CPP_NOTHROW { + tc_delete_sized_aligned(p, s, al); +} +void operator delete[](void* p, size_t s, std::align_val_t al) CPP_NOTHROW { + tc_deletearray_sized_aligned(p, s, al); +} +#endif + +#endif // defined(ENABLE_ALIGNED_NEW_DELETE) + +extern "C" { + void* malloc(size_t s) { return tc_malloc(s); } + void free(void* p) { tc_free(p); } + void* realloc(void* p, size_t s) { return tc_realloc(p, s); } + void* calloc(size_t n, size_t s) { return tc_calloc(n, s); } + void cfree(void* p) { tc_cfree(p); } + void* memalign(size_t a, size_t s) { return tc_memalign(a, s); } + void* aligned_alloc(size_t a, size_t s) { return tc_memalign(a, s); } + void* valloc(size_t s) { return tc_valloc(s); } + void* pvalloc(size_t s) { return tc_pvalloc(s); } + int posix_memalign(void** r, size_t a, size_t s) { + return tc_posix_memalign(r, a, s); + } + void malloc_stats(void) { tc_malloc_stats(); } + int mallopt(int cmd, int v) { return tc_mallopt(cmd, v); } +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) { return tc_mallinfo(); } +#endif + size_t malloc_size(void* p) { return tc_malloc_size(p); } + size_t malloc_usable_size(void* p) { return tc_malloc_size(p); } +} // extern "C" + +// No need to do anything at tcmalloc-registration time: we do it all +// via overriding weak symbols (at link time). +static void ReplaceSystemAlloc() { } + +#endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_ diff --git a/src/third_party/gperftools-2.7/src/linked_list.h b/src/third_party/gperftools-2.7/src/linked_list.h new file mode 100644 index 00000000000..f25b6f89014 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/linked_list.h @@ -0,0 +1,115 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Some very basic linked list functions for dealing with using void * as +// storage. + +#ifndef TCMALLOC_LINKED_LIST_H_ +#define TCMALLOC_LINKED_LIST_H_ + +#include <stddef.h> + +namespace tcmalloc { + +inline void *SLL_Next(void *t) { + return *(reinterpret_cast<void**>(t)); +} + +inline void SLL_SetNext(void *t, void *n) { + *(reinterpret_cast<void**>(t)) = n; +} + +inline void SLL_Push(void **list, void *element) { + void *next = *list; + *list = element; + SLL_SetNext(element, next); +} + +inline void *SLL_Pop(void **list) { + void *result = *list; + *list = SLL_Next(*list); + return result; +} + +inline bool SLL_TryPop(void **list, void **rv) { + void *result = *list; + if (!result) { + return false; + } + void *next = SLL_Next(*list); + *list = next; + *rv = result; + return true; +} + +// Remove N elements from a linked list to which head points. head will be +// modified to point to the new head. start and end will point to the first +// and last nodes of the range. Note that end will point to NULL after this +// function is called. +inline void SLL_PopRange(void **head, int N, void **start, void **end) { + if (N == 0) { + *start = NULL; + *end = NULL; + return; + } + + void *tmp = *head; + for (int i = 1; i < N; ++i) { + tmp = SLL_Next(tmp); + } + + *start = *head; + *end = tmp; + *head = SLL_Next(tmp); + // Unlink range from list. + SLL_SetNext(tmp, NULL); +} + +inline void SLL_PushRange(void **head, void *start, void *end) { + if (!start) return; + SLL_SetNext(end, *head); + *head = start; +} + +inline size_t SLL_Size(void *head) { + int count = 0; + while (head) { + count++; + head = SLL_Next(head); + } + return count; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_LINKED_LIST_H_ diff --git a/src/third_party/gperftools-2.7/src/malloc_extension.cc b/src/third_party/gperftools-2.7/src/malloc_extension.cc new file mode 100644 index 00000000000..6e695523be1 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/malloc_extension.cc @@ -0,0 +1,388 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include <assert.h> +#include <string.h> +#include <stdio.h> +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include <string> +#include "base/dynamic_annotations.h" +#include "base/sysinfo.h" // for FillProcSelfMaps +#ifndef NO_HEAP_CHECK +#include "gperftools/heap-checker.h" +#endif +#include "gperftools/malloc_extension.h" +#include "gperftools/malloc_extension_c.h" +#include "maybe_threads.h" +#include "base/googleinit.h" + +using STL_NAMESPACE::string; +using STL_NAMESPACE::vector; + +static void DumpAddressMap(string* result) { + *result += "\nMAPPED_LIBRARIES:\n"; + // We keep doubling until we get a fit + const size_t old_resultlen = result->size(); + for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) { + result->resize(old_resultlen + amap_size); + bool wrote_all = false; + const int bytes_written = + tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size, + &wrote_all); + if (wrote_all) { // we fit! + (*result)[old_resultlen + bytes_written] = '\0'; + result->resize(old_resultlen + bytes_written); + return; + } + } + result->reserve(old_resultlen); // just don't print anything +} + +// Note: this routine is meant to be called before threads are spawned. +void MallocExtension::Initialize() { + static bool initialize_called = false; + + if (initialize_called) return; + initialize_called = true; + +#ifdef __GLIBC__ + // GNU libc++ versions 3.3 and 3.4 obey the environment variables + // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting + // one of these variables forces the STL default allocator to call + // new() or delete() for each allocation or deletion. Otherwise + // the STL allocator tries to avoid the high cost of doing + // allocations by pooling memory internally. However, tcmalloc + // does allocations really fast, especially for the types of small + // items one sees in STL, so it's better off just using us. + // TODO: control whether we do this via an environment variable? + setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/); + setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/); + + // Now we need to make the setenv 'stick', which it may not do since + // the env is flakey before main() is called. But luckily stl only + // looks at this env var the first time it tries to do an alloc, and + // caches what it finds. So we just cause an stl alloc here. + string dummy("I need to be allocated"); + dummy += "!"; // so the definition of dummy isn't optimized out +#endif /* __GLIBC__ */ +} + +// SysAllocator implementation +SysAllocator::~SysAllocator() {} + +// Default implementation -- does nothing +MallocExtension::~MallocExtension() { } +bool MallocExtension::VerifyAllMemory() { return true; } +bool MallocExtension::VerifyNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; } +bool MallocExtension::VerifyMallocMemory(const void* p) { return true; } + +bool MallocExtension::GetNumericProperty(const char* property, size_t* value) { + return false; +} + +bool MallocExtension::SetNumericProperty(const char* property, size_t value) { + return false; +} + +void MallocExtension::GetStats(char* buffer, int length) { + assert(length > 0); + buffer[0] = '\0'; +} + +bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + *blocks = 0; + *total = 0; + memset(histogram, 0, sizeof(*histogram) * kMallocHistogramSize); + return true; +} + +void** MallocExtension::ReadStackTraces(int* sample_period) { + return NULL; +} + +void** MallocExtension::ReadHeapGrowthStackTraces() { + return NULL; +} + +void MallocExtension::MarkThreadIdle() { + // Default implementation does nothing +} + +void MallocExtension::MarkThreadBusy() { + // Default implementation does nothing +} + +SysAllocator* MallocExtension::GetSystemAllocator() { + return NULL; +} + +void MallocExtension::SetSystemAllocator(SysAllocator *a) { + // Default implementation does nothing +} + +void MallocExtension::ReleaseToSystem(size_t num_bytes) { + // Default implementation does nothing +} + +void MallocExtension::ReleaseFreeMemory() { + ReleaseToSystem(static_cast<size_t>(-1)); // SIZE_T_MAX +} + +void MallocExtension::SetMemoryReleaseRate(double rate) { + // Default implementation does nothing +} + +double MallocExtension::GetMemoryReleaseRate() { + return -1.0; +} + +size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { + return size; +} + +size_t MallocExtension::GetAllocatedSize(const void* p) { + assert(GetOwnership(p) != kNotOwned); + return 0; +} + +MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) { + return kUnknownOwnership; +} + +void MallocExtension::GetFreeListSizes( + vector<MallocExtension::FreeListInfo>* v) { + v->clear(); +} + +size_t MallocExtension::GetThreadCacheSize() { + return 0; +} + +void MallocExtension::MarkThreadTemporarilyIdle() { + // Default implementation does nothing +} + +// The current malloc extension object. + +static MallocExtension* current_instance; + +static void InitModule() { + if (current_instance != NULL) { + return; + } + current_instance = new MallocExtension; +#ifndef NO_HEAP_CHECK + HeapLeakChecker::IgnoreObject(current_instance); +#endif +} + +REGISTER_MODULE_INITIALIZER(malloc_extension_init, InitModule()) + +MallocExtension* MallocExtension::instance() { + InitModule(); + return current_instance; +} + +void MallocExtension::Register(MallocExtension* implementation) { + InitModule(); + // When running under valgrind, our custom malloc is replaced with + // valgrind's one and malloc extensions will not work. (Note: + // callers should be responsible for checking that they are the + // malloc that is really being run, before calling Register. This + // is just here as an extra sanity check.) + if (!RunningOnValgrind()) { + current_instance = implementation; + } +} + +// ----------------------------------------------------------------------- +// Heap sampling support +// ----------------------------------------------------------------------- + +namespace { + +// Accessors +uintptr_t Count(void** entry) { + return reinterpret_cast<uintptr_t>(entry[0]); +} +uintptr_t Size(void** entry) { + return reinterpret_cast<uintptr_t>(entry[1]); +} +uintptr_t Depth(void** entry) { + return reinterpret_cast<uintptr_t>(entry[2]); +} +void* PC(void** entry, int i) { + return entry[3+i]; +} + +void PrintCountAndSize(MallocExtensionWriter* writer, + uintptr_t count, uintptr_t size) { + char buf[100]; + snprintf(buf, sizeof(buf), + "%6" PRIu64 ": %8" PRIu64 " [%6" PRIu64 ": %8" PRIu64 "] @", + static_cast<uint64>(count), + static_cast<uint64>(size), + static_cast<uint64>(count), + static_cast<uint64>(size)); + writer->append(buf, strlen(buf)); +} + +void PrintHeader(MallocExtensionWriter* writer, + const char* label, void** entries) { + // Compute the total count and total size + uintptr_t total_count = 0; + uintptr_t total_size = 0; + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + total_count += Count(entry); + total_size += Size(entry); + } + + const char* const kTitle = "heap profile: "; + writer->append(kTitle, strlen(kTitle)); + PrintCountAndSize(writer, total_count, total_size); + writer->append(" ", 1); + writer->append(label, strlen(label)); + writer->append("\n", 1); +} + +void PrintStackEntry(MallocExtensionWriter* writer, void** entry) { + PrintCountAndSize(writer, Count(entry), Size(entry)); + + for (int i = 0; i < Depth(entry); i++) { + char buf[32]; + snprintf(buf, sizeof(buf), " %p", PC(entry, i)); + writer->append(buf, strlen(buf)); + } + writer->append("\n", 1); +} + +} + +void MallocExtension::GetHeapSample(MallocExtensionWriter* writer) { + int sample_period = 0; + void** entries = ReadStackTraces(&sample_period); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support sampling.\n" + "As of 2005/01/26, only tcmalloc supports sampling, and\n" + "you are probably running a binary that does not use\n" + "tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + char label[32]; + sprintf(label, "heap_v2/%d", sample_period); + PrintHeader(writer, label, entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) { + void** entries = ReadHeapGrowthStackTraces(); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support " + "ReadHeapGrowthStackTraces().\n" + "As of 2005/09/27, only tcmalloc supports this, and you\n" + "are probably running a binary that does not use tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + // Do not canonicalize the stack entries, so that we get a + // time-ordered list of stack traces, which may be useful if the + // client wants to focus on the latest stack traces. + PrintHeader(writer, "growth", entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::Ranges(void* arg, RangeFunction func) { + // No callbacks by default +} + +// These are C shims that work on the current instance. + +#define C_SHIM(fn, retval, paramlist, arglist) \ + extern "C" PERFTOOLS_DLL_DECL retval MallocExtension_##fn paramlist { \ + return MallocExtension::instance()->fn arglist; \ + } + +C_SHIM(VerifyAllMemory, int, (void), ()); +C_SHIM(VerifyNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p)); +C_SHIM(VerifyMallocMemory, int, (const void* p), (p)); +C_SHIM(MallocMemoryStats, int, + (int* blocks, size_t* total, int histogram[kMallocHistogramSize]), + (blocks, total, histogram)); + +C_SHIM(GetStats, void, + (char* buffer, int buffer_length), (buffer, buffer_length)); +C_SHIM(GetNumericProperty, int, + (const char* property, size_t* value), (property, value)); +C_SHIM(SetNumericProperty, int, + (const char* property, size_t value), (property, value)); + +C_SHIM(MarkThreadIdle, void, (void), ()); +C_SHIM(MarkThreadBusy, void, (void), ()); +C_SHIM(ReleaseFreeMemory, void, (void), ()); +C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes)); +C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); +C_SHIM(GetAllocatedSize, size_t, (const void* p), (p)); +C_SHIM(GetThreadCacheSize, size_t, (void), ()); +C_SHIM(MarkThreadTemporarilyIdle, void, (void), ()); + +// Can't use the shim here because of the need to translate the enums. +extern "C" +MallocExtension_Ownership MallocExtension_GetOwnership(const void* p) { + return static_cast<MallocExtension_Ownership>( + MallocExtension::instance()->GetOwnership(p)); +} diff --git a/src/third_party/gperftools-2.7/src/malloc_hook-inl.h b/src/third_party/gperftools-2.7/src/malloc_hook-inl.h new file mode 100644 index 00000000000..30375d6f167 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/malloc_hook-inl.h @@ -0,0 +1,249 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// This has the implementation details of malloc_hook that are needed +// to use malloc-hook inside the tcmalloc system. It does not hold +// any of the client-facing calls that are used to add new hooks. + +#ifndef _MALLOC_HOOK_INL_H_ +#define _MALLOC_HOOK_INL_H_ + +#include <stddef.h> +#include <sys/types.h> +#include "base/atomicops.h" +#include "base/basictypes.h" +#include <gperftools/malloc_hook.h> + +#include "common.h" // for UNLIKELY + +namespace base { namespace internal { + +// Capacity of 8 means that HookList is 9 words. +static const int kHookListCapacity = 8; +// last entry is reserved for deprecated "singular" hooks. So we have +// 7 "normal" hooks per list +static const int kHookListMaxValues = 7; +static const int kHookListSingularIdx = 7; + +// HookList: a class that provides synchronized insertions and removals and +// lockless traversal. Most of the implementation is in malloc_hook.cc. +template <typename T> +struct PERFTOOLS_DLL_DECL HookList { + COMPILE_ASSERT(sizeof(T) <= sizeof(AtomicWord), T_should_fit_in_AtomicWord); + + // Adds value to the list. Note that duplicates are allowed. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no space left). + bool Add(T value); + + void FixupPrivEndLocked(); + + // Removes the first entry matching value from the list. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no value found). + bool Remove(T value); + + // Store up to n values of the list in output_array, and return the number of + // elements stored. Thread-safe and non-blocking. This is fast (one memory + // access) if the list is empty. + int Traverse(T* output_array, int n) const; + + // Fast inline implementation for fast path of Invoke*Hook. + bool empty() const { + return base::subtle::NoBarrier_Load(&priv_end) == 0; + } + + // Used purely to handle deprecated singular hooks + T GetSingular() const { + const AtomicWord *place = &priv_data[kHookListSingularIdx]; + return bit_cast<T>(base::subtle::NoBarrier_Load(place)); + } + + T ExchangeSingular(T new_val); + + // This internal data is not private so that the class is an aggregate and can + // be initialized by the linker. Don't access this directly. Use the + // INIT_HOOK_LIST macro in malloc_hook.cc. + + // One more than the index of the last valid element in priv_data. During + // 'Remove' this may be past the last valid element in priv_data, but + // subsequent values will be 0. + // + // Index kHookListCapacity-1 is reserved as 'deprecated' single hook pointer + AtomicWord priv_end; + AtomicWord priv_data[kHookListCapacity]; +}; + +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::NewHook> new_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::DeleteHook> delete_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreMmapHook> premmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapHook> mmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MmapReplacement> mmap_replacement_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapHook> munmap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MunmapReplacement> munmap_replacement_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::MremapHook> mremap_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_; +ATTRIBUTE_VISIBILITY_HIDDEN extern HookList<MallocHook::SbrkHook> sbrk_hooks_; + +} } // namespace base::internal + +// The following method is DEPRECATED +inline MallocHook::NewHook MallocHook::GetNewHook() { + return base::internal::new_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeNewHook(const void* p, size_t s) { + if (PREDICT_FALSE(!base::internal::new_hooks_.empty())) { + InvokeNewHookSlow(p, s); + } +} + +// The following method is DEPRECATED +inline MallocHook::DeleteHook MallocHook::GetDeleteHook() { + return base::internal::delete_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeDeleteHook(const void* p) { + if (PREDICT_FALSE(!base::internal::delete_hooks_.empty())) { + InvokeDeleteHookSlow(p); + } +} + +// The following method is DEPRECATED +inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() { + return base::internal::premmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (!base::internal::premmap_hooks_.empty()) { + InvokePreMmapHookSlow(start, size, protection, flags, fd, offset); + } +} + +// The following method is DEPRECATED +inline MallocHook::MmapHook MallocHook::GetMmapHook() { + return base::internal::mmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (!base::internal::mmap_hooks_.empty()) { + InvokeMmapHookSlow(result, start, size, protection, flags, fd, offset); + } +} + +inline bool MallocHook::InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMmapReplacementSlow(start, size, + protection, flags, + fd, offset, + result); + } + return false; +} + +// The following method is DEPRECATED +inline MallocHook::MunmapHook MallocHook::GetMunmapHook() { + return base::internal::munmap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) { + if (!base::internal::munmap_hooks_.empty()) { + InvokeMunmapHookSlow(p, size); + } +} + +inline bool MallocHook::InvokeMunmapReplacement( + const void* p, size_t size, int* result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMunmapReplacementSlow(p, size, result); + } + return false; +} + +// The following method is DEPRECATED +inline MallocHook::MremapHook MallocHook::GetMremapHook() { + return base::internal::mremap_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + if (!base::internal::mremap_hooks_.empty()) { + InvokeMremapHookSlow(result, old_addr, old_size, new_size, flags, new_addr); + } +} + +// The following method is DEPRECATED +inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() { + return base::internal::presbrk_hooks_.GetSingular(); +} + +inline void MallocHook::InvokePreSbrkHook(ptrdiff_t increment) { + if (!base::internal::presbrk_hooks_.empty() && increment != 0) { + InvokePreSbrkHookSlow(increment); + } +} + +// The following method is DEPRECATED +inline MallocHook::SbrkHook MallocHook::GetSbrkHook() { + return base::internal::sbrk_hooks_.GetSingular(); +} + +inline void MallocHook::InvokeSbrkHook(const void* result, + ptrdiff_t increment) { + if (!base::internal::sbrk_hooks_.empty() && increment != 0) { + InvokeSbrkHookSlow(result, increment); + } +} + +#endif /* _MALLOC_HOOK_INL_H_ */ diff --git a/src/third_party/gperftools-2.7/src/malloc_hook.cc b/src/third_party/gperftools-2.7/src/malloc_hook.cc new file mode 100644 index 00000000000..64c21658a02 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/malloc_hook.cc @@ -0,0 +1,711 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> + +// Disable the glibc prototype of mremap(), as older versions of the +// system headers define this function with only four arguments, +// whereas newer versions allow an optional fifth argument: +#ifdef HAVE_MMAP +# define mremap glibc_mremap +# include <sys/mman.h> +# undef mremap +#endif + +#include <stddef.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#include <algorithm> +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_emergency_malloc.h" +#include "maybe_threads.h" +#include "malloc_hook-inl.h" +#include <gperftools/malloc_hook.h> + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <gperftools/stacktrace.h> +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +using std::copy; + + +// Declaration of default weak initialization function, that can be overridden +// by linking-in a strong definition (as heap-checker.cc does). This is +// extern "C" so that it doesn't trigger gold's --detect-odr-violations warning, +// which only looks at C++ symbols. +// +// This function is declared here as weak, and defined later, rather than a more +// straightforward simple weak definition, as a workround for an icc compiler +// issue ((Intel reference 290819). This issue causes icc to resolve weak +// symbols too early, at compile rather than link time. By declaring it (weak) +// here, then defining it below after its use, we can avoid the problem. +extern "C" { +ATTRIBUTE_WEAK void MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +namespace { + +void RemoveInitialHooksAndCallInitializers(); // below. + +pthread_once_t once = PTHREAD_ONCE_INIT; + +// These hooks are installed in MallocHook as the only initial hooks. The first +// hook that is called will run RemoveInitialHooksAndCallInitializers (see the +// definition below) and then redispatch to any malloc hooks installed by +// RemoveInitialHooksAndCallInitializers. +// +// Note(llib): there is a possibility of a race in the event that there are +// multiple threads running before the first allocation. This is pretty +// difficult to achieve, but if it is then multiple threads may concurrently do +// allocations. The first caller will call +// RemoveInitialHooksAndCallInitializers via one of the initial hooks. A +// concurrent allocation may, depending on timing either: +// * still have its initial malloc hook installed, run that and block on waiting +// for the first caller to finish its call to +// RemoveInitialHooksAndCallInitializers, and proceed normally. +// * occur some time during the RemoveInitialHooksAndCallInitializers call, at +// which point there could be no initial hooks and the subsequent hooks that +// are about to be set up by RemoveInitialHooksAndCallInitializers haven't +// been installed yet. I think the worst we can get is that some allocations +// will not get reported to some hooks set by the initializers called from +// RemoveInitialHooksAndCallInitializers. + +void InitialNewHook(const void* ptr, size_t size) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokeNewHook(ptr, size); +} + +void InitialPreMMapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreMmapHook(start, size, protection, flags, fd, offset); +} + +void InitialPreSbrkHook(ptrdiff_t increment) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreSbrkHook(increment); +} + +// This function is called at most once by one of the above initial malloc +// hooks. It removes all initial hooks and initializes all other clients that +// want to get control at the very first memory allocation. The initializers +// may assume that the initial malloc hooks have been removed. The initializers +// may set up malloc hooks and allocate memory. +void RemoveInitialHooksAndCallInitializers() { + RAW_CHECK(MallocHook::RemoveNewHook(&InitialNewHook), ""); + RAW_CHECK(MallocHook::RemovePreMmapHook(&InitialPreMMapHook), ""); + RAW_CHECK(MallocHook::RemovePreSbrkHook(&InitialPreSbrkHook), ""); + + // HeapLeakChecker is currently the only module that needs to get control on + // the first memory allocation, but one can add other modules by following the + // same weak/strong function pattern. + MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +} // namespace + +// Weak default initialization function that must go after its use. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + // Do nothing. +} + +namespace base { namespace internal { + +// This lock is shared between all implementations of HookList::Add & Remove. +// The potential for contention is very small. This needs to be a SpinLock and +// not a Mutex since it's possible for Mutex locking to allocate memory (e.g., +// per-thread allocation in debug builds), which could cause infinite recursion. +static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED); + +template <typename T> +bool HookList<T>::Add(T value_as_t) { + AtomicWord value = bit_cast<AtomicWord>(value_as_t); + if (value == 0) { + return false; + } + SpinLockHolder l(&hooklist_spinlock); + // Find the first slot in data that is 0. + int index = 0; + while ((index < kHookListMaxValues) && + (base::subtle::NoBarrier_Load(&priv_data[index]) != 0)) { + ++index; + } + if (index == kHookListMaxValues) { + return false; + } + AtomicWord prev_num_hooks = base::subtle::Acquire_Load(&priv_end); + base::subtle::NoBarrier_Store(&priv_data[index], value); + if (prev_num_hooks <= index) { + base::subtle::NoBarrier_Store(&priv_end, index + 1); + } + return true; +} + +template <typename T> +void HookList<T>::FixupPrivEndLocked() { + AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end); + while ((hooks_end > 0) && + (base::subtle::NoBarrier_Load(&priv_data[hooks_end - 1]) == 0)) { + --hooks_end; + } + base::subtle::NoBarrier_Store(&priv_end, hooks_end); +} + +template <typename T> +bool HookList<T>::Remove(T value_as_t) { + if (value_as_t == 0) { + return false; + } + SpinLockHolder l(&hooklist_spinlock); + AtomicWord hooks_end = base::subtle::NoBarrier_Load(&priv_end); + int index = 0; + while (index < hooks_end && value_as_t != bit_cast<T>( + base::subtle::NoBarrier_Load(&priv_data[index]))) { + ++index; + } + if (index == hooks_end) { + return false; + } + base::subtle::NoBarrier_Store(&priv_data[index], 0); + FixupPrivEndLocked(); + return true; +} + +template <typename T> +int HookList<T>::Traverse(T* output_array, int n) const { + AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end); + int actual_hooks_end = 0; + for (int i = 0; i < hooks_end && n > 0; ++i) { + AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]); + if (data != 0) { + *output_array++ = bit_cast<T>(data); + ++actual_hooks_end; + --n; + } + } + return actual_hooks_end; +} + +template <typename T> +T HookList<T>::ExchangeSingular(T value_as_t) { + AtomicWord value = bit_cast<AtomicWord>(value_as_t); + AtomicWord old_value; + SpinLockHolder l(&hooklist_spinlock); + old_value = base::subtle::NoBarrier_Load(&priv_data[kHookListSingularIdx]); + base::subtle::NoBarrier_Store(&priv_data[kHookListSingularIdx], value); + if (value != 0) { + base::subtle::NoBarrier_Store(&priv_end, kHookListSingularIdx + 1); + } else { + FixupPrivEndLocked(); + } + return bit_cast<T>(old_value); +} + +// Initialize a HookList (optionally with the given initial_value in index 0). +#define INIT_HOOK_LIST { 0 } +#define INIT_HOOK_LIST_WITH_VALUE(initial_value) \ + { 1, { reinterpret_cast<AtomicWord>(initial_value) } } + +// Explicit instantiation for malloc_hook_test.cc. This ensures all the methods +// are instantiated. +template struct HookList<MallocHook::NewHook>; + +HookList<MallocHook::NewHook> new_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialNewHook); +HookList<MallocHook::DeleteHook> delete_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreMmapHook> premmap_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialPreMMapHook); +HookList<MallocHook::MmapHook> mmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MunmapHook> munmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MremapHook> mremap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreSbrkHook> presbrk_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook); +HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST; + +// These lists contain either 0 or 1 hooks. +HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 }; +HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 }; + +#undef INIT_HOOK_LIST_WITH_VALUE +#undef INIT_HOOK_LIST + +} } // namespace base::internal + +using base::internal::kHookListMaxValues; +using base::internal::new_hooks_; +using base::internal::delete_hooks_; +using base::internal::premmap_hooks_; +using base::internal::mmap_hooks_; +using base::internal::mmap_replacement_; +using base::internal::munmap_hooks_; +using base::internal::munmap_replacement_; +using base::internal::mremap_hooks_; +using base::internal::presbrk_hooks_; +using base::internal::sbrk_hooks_; + +// These are available as C bindings as well as C++, hence their +// definition outside the MallocHook class. +extern "C" +int MallocHook_AddNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "AddNewHook(%p)", hook); + return new_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "RemoveNewHook(%p)", hook); + return new_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "AddDeleteHook(%p)", hook); + return delete_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "RemoveDeleteHook(%p)", hook); + return delete_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "AddPreMmapHook(%p)", hook); + return premmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "RemovePreMmapHook(%p)", hook); + return premmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "SetMmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed."); + return mmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook); + return mmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "AddMmapHook(%p)", hook); + return mmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "RemoveMmapHook(%p)", hook); + return mmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "AddMunmapHook(%p)", hook); + return munmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "RemoveMunmapHook(%p)", hook); + return munmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "SetMunmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(munmap_replacement_.empty(), + "Only one MunmapReplacement is allowed."); + return munmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook); + return munmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "AddMremapHook(%p)", hook); + return mremap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "RemoveMremapHook(%p)", hook); + return mremap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "AddPreSbrkHook(%p)", hook); + return presbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "RemovePreSbrkHook(%p)", hook); + return presbrk_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "AddSbrkHook(%p)", hook); + return sbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "RemoveSbrkHook(%p)", hook); + return sbrk_hooks_.Remove(hook); +} + +// The code below is DEPRECATED. +extern "C" +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "SetNewHook(%p)", hook); + return new_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "SetDeleteHook(%p)", hook); + return delete_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "SetPreMmapHook(%p)", hook); + return premmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "SetMmapHook(%p)", hook); + return mmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "SetMunmapHook(%p)", hook); + return munmap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "SetMremapHook(%p)", hook); + return mremap_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "SetPreSbrkHook(%p)", hook); + return presbrk_hooks_.ExchangeSingular(hook); +} + +extern "C" +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "SetSbrkHook(%p)", hook); + return sbrk_hooks_.ExchangeSingular(hook); +} +// End of DEPRECATED code section. + +// Note: embedding the function calls inside the traversal of HookList would be +// very confusing, as it is legal for a hook to remove itself and add other +// hooks. Doing traversal first, and then calling the hooks ensures we only +// call the hooks registered at the start. +#define INVOKE_HOOKS(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + for (int i = 0; i < num_hooks; ++i) { \ + (*hooks[i])args; \ + } \ + } while (0) + +// There should only be one replacement. Return the result of the first +// one, or false if there is none. +#define INVOKE_REPLACEMENT(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + return (num_hooks > 0 && (*hooks[0])args); \ + } while (0) + + +void MallocHook::InvokeNewHookSlow(const void* p, size_t s) { + if (tcmalloc::IsEmergencyPtr(p)) { + return; + } + INVOKE_HOOKS(NewHook, new_hooks_, (p, s)); +} + +void MallocHook::InvokeDeleteHookSlow(const void* p) { + if (tcmalloc::IsEmergencyPtr(p)) { + return; + } + INVOKE_HOOKS(DeleteHook, delete_hooks_, (p)); +} + +void MallocHook::InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(PreMmapHook, premmap_hooks_, (start, size, protection, flags, fd, + offset)); +} + +void MallocHook::InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(MmapHook, mmap_hooks_, (result, start, size, protection, flags, + fd, offset)); +} + +bool MallocHook::InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_, + (start, size, protection, flags, fd, offset, result)); +} + +void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) { + INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s)); +} + +bool MallocHook::InvokeMunmapReplacementSlow(const void* p, + size_t s, + int* result) { + INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result)); +} + +void MallocHook::InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + INVOKE_HOOKS(MremapHook, mremap_hooks_, (result, old_addr, old_size, new_size, + flags, new_addr)); +} + +void MallocHook::InvokePreSbrkHookSlow(ptrdiff_t increment) { + INVOKE_HOOKS(PreSbrkHook, presbrk_hooks_, (increment)); +} + +void MallocHook::InvokeSbrkHookSlow(const void* result, ptrdiff_t increment) { + INVOKE_HOOKS(SbrkHook, sbrk_hooks_, (result, increment)); +} + +#undef INVOKE_HOOKS + +#ifndef NO_TCMALLOC_SAMPLES + +DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc); +DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc); + // actual functions are in debugallocation.cc or tcmalloc.cc +DEFINE_ATTRIBUTE_SECTION_VARS(malloc_hook); +DECLARE_ATTRIBUTE_SECTION_VARS(malloc_hook); + // actual functions are in this file, malloc_hook.cc, and low_level_alloc.cc + +#define ADDR_IN_ATTRIBUTE_SECTION(addr, name) \ + (reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_START(name)) <= \ + reinterpret_cast<uintptr_t>(addr) && \ + reinterpret_cast<uintptr_t>(addr) < \ + reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_STOP(name))) + +// Return true iff 'caller' is a return address within a function +// that calls one of our hooks via MallocHook:Invoke*. +// A helper for GetCallerStackTrace. +static inline bool InHookCaller(const void* caller) { + return ADDR_IN_ATTRIBUTE_SECTION(caller, google_malloc) || + ADDR_IN_ATTRIBUTE_SECTION(caller, malloc_hook); + // We can use one section for everything except tcmalloc_or_debug + // due to its special linkage mode, which prevents merging of the sections. +} + +#undef ADDR_IN_ATTRIBUTE_SECTION + +static bool checked_sections = false; + +static inline void CheckInHookCaller() { + if (!checked_sections) { + INIT_ATTRIBUTE_SECTION_VARS(google_malloc); + if (ATTRIBUTE_SECTION_START(google_malloc) == + ATTRIBUTE_SECTION_STOP(google_malloc)) { + RAW_LOG(ERROR, "google_malloc section is missing, " + "thus InHookCaller is broken!"); + } + INIT_ATTRIBUTE_SECTION_VARS(malloc_hook); + if (ATTRIBUTE_SECTION_START(malloc_hook) == + ATTRIBUTE_SECTION_STOP(malloc_hook)) { + RAW_LOG(ERROR, "malloc_hook section is missing, " + "thus InHookCaller is broken!"); + } + checked_sections = true; + } +} + +#endif // !NO_TCMALLOC_SAMPLES + +// We can improve behavior/compactness of this function +// if we pass a generic test function (with a generic arg) +// into the implementations for GetStackTrace instead of the skip_count. +extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count) { +#if defined(NO_TCMALLOC_SAMPLES) + return 0; +#elif !defined(HAVE_ATTRIBUTE_SECTION_START) + // Fall back to GetStackTrace and good old but fragile frame skip counts. + // Note: this path is inaccurate when a hook is not called directly by an + // allocation function but is daisy-chained through another hook, + // search for MallocHook::(Get|Set|Invoke)* to find such cases. + return GetStackTrace(result, max_depth, skip_count + int(DEBUG_MODE)); + // due to -foptimize-sibling-calls in opt mode + // there's no need for extra frame skip here then +#else + CheckInHookCaller(); + // MallocHook caller determination via InHookCaller works, use it: + static const int kMaxSkip = 32 + 6 + 3; + // Constant tuned to do just one GetStackTrace call below in practice + // and not get many frames that we don't actually need: + // currently max passsed max_depth is 32, + // max passed/needed skip_count is 6 + // and 3 is to account for some hook daisy chaining. + static const int kStackSize = kMaxSkip + 1; + void* stack[kStackSize]; + int depth = GetStackTrace(stack, kStackSize, 1); // skip this function frame + if (depth == 0) // silenty propagate cases when GetStackTrace does not work + return 0; + for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller + if (InHookCaller(stack[i])) { + // fast-path to slow-path calls may be implemented by compiler + // as non-tail calls. Causing two functions on stack trace to be + // inside google_malloc. In such case we're skipping to + // outermost such frame since this is where malloc stack frames + // really start. + while (i + 1 < depth && InHookCaller(stack[i+1])) { + i++; + } + RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); + i += 1; // skip hook caller frame + depth -= i; // correct depth + if (depth > max_depth) depth = max_depth; + copy(stack + i, stack + i + depth, result); + if (depth < max_depth && depth + i == kStackSize) { + // get frames for the missing depth + depth += + GetStackTrace(result + depth, max_depth - depth, 1 + kStackSize); + } + return depth; + } + } + RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace"); + // If this happens try increasing kMaxSkip + // or else something must be wrong with InHookCaller, + // e.g. for every section used in InHookCaller + // all functions in that section must be inside the same library. + return 0; +#endif +} + +// On systems where we know how, we override mmap/munmap/mremap/sbrk +// to provide support for calling the related hooks (in addition, +// of course, to doing what these functions normally do). + +#if defined(__linux) +# include "malloc_hook_mmap_linux.h" + +#elif defined(__FreeBSD__) +# include "malloc_hook_mmap_freebsd.h" + +#else + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = mmap(start, length, prot, flags, fd, offset); + } + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = munmap(start, length); + } + return result; +} + +#endif diff --git a/src/third_party/gperftools-2.7/src/malloc_hook_mmap_freebsd.h b/src/third_party/gperftools-2.7/src/malloc_hook_mmap_freebsd.h new file mode 100644 index 00000000000..8575dcc7c08 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/malloc_hook_mmap_freebsd.h @@ -0,0 +1,135 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Override mmap/munmap/mremap/sbrk to provide support for calling the +// related hooks (in addition, of course, to doing what these +// functions normally do). + +#ifndef __FreeBSD__ +# error Should only be including malloc_hook_mmap_freebsd.h on FreeBSD systems. +#endif + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <errno.h> +#include <dlfcn.h> + +// Make sure mmap doesn't get #define'd away by <sys/mman.h> +#undef mmap + +// According to the FreeBSD documentation, use syscall if you do not +// need 64-bit alignment otherwise use __syscall. Indeed, syscall +// doesn't work correctly in most situations on 64-bit. It's return +// type is 'int' so for things like SYS_mmap, it actually truncates +// the returned address to 32-bits. +#if defined(__amd64__) || defined(__x86_64__) +# define MALLOC_HOOK_SYSCALL __syscall +#else +# define MALLOC_HOOK_SYSCALL syscall +#endif + + +extern "C" { + void* mmap(void *start, size_t length,int prot, int flags, + int fd, off_t offset) __THROW + ATTRIBUTE_SECTION(malloc_hook); + int munmap(void* start, size_t length) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* sbrk(intptr_t increment) __THROW + ATTRIBUTE_SECTION(malloc_hook); +} + +static inline void* do_mmap(void *start, size_t length, + int prot, int flags, + int fd, off_t offset) __THROW { + return (void *)MALLOC_HOOK_SYSCALL(SYS_mmap, + start, length, prot, flags, fd, offset); +} + +static inline void* do_sbrk(intptr_t increment) { + static void *(*libc_sbrk)(intptr_t); + if (libc_sbrk == NULL) + libc_sbrk = (void *(*)(intptr_t))dlsym(RTLD_NEXT, "sbrk"); + + return libc_sbrk(increment); +} + + +extern "C" void* mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +extern "C" int munmap(void* start, size_t length) __THROW { + MallocHook::InvokeMunmapHook(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length); + } + + return result; +} + +extern "C" void* sbrk(intptr_t increment) __THROW { + MallocHook::InvokePreSbrkHook(increment); + void *result = do_sbrk(increment); + MallocHook::InvokeSbrkHook(result, increment); + return result; +} + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap(start, length, prot, flags, fd, offset); + } + + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length); + } + return result; +} + +#undef MALLOC_HOOK_SYSCALL diff --git a/src/third_party/gperftools-2.7/src/malloc_hook_mmap_linux.h b/src/third_party/gperftools-2.7/src/malloc_hook_mmap_linux.h new file mode 100755 index 00000000000..2f6116fa45c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/malloc_hook_mmap_linux.h @@ -0,0 +1,242 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +// We define mmap() and mmap64(), which somewhat reimplements libc's mmap +// syscall stubs. Unfortunately libc only exports the stubs via weak symbols +// (which we're overriding with our mmap64() and mmap() wrappers) so we can't +// just call through to them. + +#ifndef __linux +# error Should only be including malloc_hook_mmap_linux.h on linux systems. +#endif + +#include <unistd.h> +#include <syscall.h> +#include <sys/mman.h> +#include <errno.h> +#include "base/linux_syscall_support.h" + +// The x86-32 case and the x86-64 case differ: +// 32b has a mmap2() syscall, 64b does not. +// 64b and 32b have different calling conventions for mmap(). + +// I test for 64-bit first so I don't have to do things like +// '#if (defined(__mips__) && !defined(__MIPS64__))' as a mips32 check. +#if defined(__x86_64__) \ + || defined(__PPC64__) \ + || defined(__aarch64__) \ + || (defined(_MIPS_SIM) && (_MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32)) \ + || defined(__s390__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + return sys_mmap(start, length, prot, flags, fd, offset); +} + +#define MALLOC_HOOK_HAVE_DO_MMAP64 1 + +#elif defined(__i386__) || defined(__PPC__) || defined(__mips__) || \ + defined(__arm__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + void *result; + + // Try mmap2() unless it's not supported + static bool have_mmap2 = true; + if (have_mmap2) { + static int pagesize = 0; + if (!pagesize) pagesize = getpagesize(); + + // Check that the offset is page aligned + if (offset & (pagesize - 1)) { + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + + result = (void *)syscall(SYS_mmap2, + start, length, prot, flags, fd, + (off_t) (offset / pagesize)); + if (result != MAP_FAILED || errno != ENOSYS) goto out; + + // We don't have mmap2() after all - don't bother trying it in future + have_mmap2 = false; + } + + if (((off_t)offset) != offset) { + // If we're trying to map a 64-bit offset, fail now since we don't + // have 64-bit mmap() support. + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + +#ifdef __NR_mmap + { + // Fall back to old 32-bit offset mmap() call + // Old syscall interface cannot handle six args, so pass in an array + int32 args[6] = { (int32) start, (int32) length, prot, flags, fd, + (int32)(off_t) offset }; + result = (void *)syscall(SYS_mmap, args); + } +#else + // Some Linux ports like ARM EABI Linux has no mmap, just mmap2. + result = MAP_FAILED; +#endif + + out: + return result; +} + +#define MALLOC_HOOK_HAVE_DO_MMAP64 1 + +#endif // #if defined(__x86_64__) + + +#ifdef MALLOC_HOOK_HAVE_DO_MMAP64 + +// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook +// calls right into mmap and mmap64, so that the stack frames in the caller's +// stack are at the same offsets for all the calls of memory allocating +// functions. + +// Put all callers of MallocHook::Invoke* in this module into +// malloc_hook section, +// so that MallocHook::GetCallerStackTrace can function accurately: + +// Make sure mmap doesn't get #define'd away by <sys/mman.h> +# undef mmap + +extern "C" { + void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset ) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mmap(void *start, size_t length,int prot, int flags, + int fd, off_t offset) __THROW + ATTRIBUTE_SECTION(malloc_hook); + int munmap(void* start, size_t length) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* sbrk(intptr_t increment) __THROW + ATTRIBUTE_SECTION(malloc_hook); +} + +extern "C" void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +# if !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) + +extern "C" void* mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +# endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) + +extern "C" int munmap(void* start, size_t length) __THROW { + MallocHook::InvokeMunmapHook(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = sys_munmap(start, length); + } + return result; +} + +extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW { + va_list ap; + va_start(ap, flags); + void *new_address = va_arg(ap, void *); + va_end(ap); + void* result = sys_mremap(old_addr, old_size, new_size, flags, new_address); + MallocHook::InvokeMremapHook(result, old_addr, old_size, new_size, flags, + new_address); + return result; +} + +#ifndef __UCLIBC__ +// libc's version: +extern "C" void* __sbrk(intptr_t increment); + +extern "C" void* sbrk(intptr_t increment) __THROW { + MallocHook::InvokePreSbrkHook(increment); + void *result = __sbrk(increment); + MallocHook::InvokeSbrkHook(result, increment); + return result; +} + +#endif + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + return result; +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = syscall(SYS_munmap, start, length); + } + return result; +} + +#undef MALLOC_HOOK_HAVE_DO_MMAP64 + +#endif // #ifdef MALLOC_HOOK_HAVE_DO_MMAP64 diff --git a/src/third_party/gperftools-2.7/src/maybe_emergency_malloc.h b/src/third_party/gperftools-2.7/src/maybe_emergency_malloc.h new file mode 100644 index 00000000000..250ecf01a3f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/maybe_emergency_malloc.h @@ -0,0 +1,55 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2014, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MAYBE_EMERGENCY_MALLOC_H +#define MAYBE_EMERGENCY_MALLOC_H + +#include "config.h" + +#ifdef ENABLE_EMERGENCY_MALLOC + +#include "emergency_malloc.h" + +#else + +namespace tcmalloc { + static inline void *EmergencyMalloc(size_t size) {return NULL;} + static inline void EmergencyFree(void *p) {} + static inline void *EmergencyCalloc(size_t n, size_t elem_size) {return NULL;} + static inline void *EmergencyRealloc(void *old_ptr, size_t new_size) {return NULL;} + + static inline bool IsEmergencyPtr(const void *_ptr) { + return false; + } +} + +#endif // ENABLE_EMERGENCY_MALLOC + +#endif diff --git a/src/third_party/gperftools-2.7/src/maybe_threads.cc b/src/third_party/gperftools-2.7/src/maybe_threads.cc new file mode 100644 index 00000000000..ef7e582c9de --- /dev/null +++ b/src/third_party/gperftools-2.7/src/maybe_threads.cc @@ -0,0 +1,177 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> +// +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +// +// This module will behave very strangely if some pthreads functions +// exist and others don't. + +#include "config.h" +#include <assert.h> +#include <string.h> // for memcmp +#include <stdio.h> // for __isthreaded on FreeBSD +// We don't actually need strings. But including this header seems to +// stop the compiler trying to short-circuit our pthreads existence +// tests and claiming that the address of a function is always +// non-zero. I have no idea why ... +#include <string> +#include "maybe_threads.h" +#include "base/basictypes.h" +#include "base/logging.h" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// These are the methods we're going to conditionally include. +extern "C" { + int pthread_key_create (pthread_key_t*, void (*)(void*)) + __THROW ATTRIBUTE_WEAK; + int pthread_key_delete (pthread_key_t) + __THROW ATTRIBUTE_WEAK; + void *pthread_getspecific(pthread_key_t) + __THROW ATTRIBUTE_WEAK; + int pthread_setspecific(pthread_key_t, const void*) + __THROW ATTRIBUTE_WEAK; + int pthread_once(pthread_once_t *, void (*)(void)) + ATTRIBUTE_WEAK; +#ifdef HAVE_FORK + int pthread_atfork(void (*__prepare) (void), + void (*__parent) (void), + void (*__child) (void)) + __THROW ATTRIBUTE_WEAK; +#endif +} + +#define MAX_PERTHREAD_VALS 16 +static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS]; +static int next_key; + +// NOTE: it's similar to bitcast defined in basic_types.h with +// exception of ignoring sizes mismatch +template <typename T1, typename T2> +static T2 memcpy_cast(const T1 &input) { + T2 output; + size_t s = sizeof(input); + if (sizeof(output) < s) { + s = sizeof(output); + } + memcpy(&output, &input, s); + return output; +} + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)) { + if (pthread_key_create) { + return pthread_key_create(key, destr_function); + } else { + assert(next_key < MAX_PERTHREAD_VALS); + *key = memcpy_cast<int, pthread_key_t>(next_key++); + return 0; + } +} + +int perftools_pthread_key_delete(pthread_key_t key) { + if (pthread_key_delete) { + return pthread_key_delete(key); + } else { + return 0; + } +} + +void *perftools_pthread_getspecific(pthread_key_t key) { + if (pthread_getspecific) { + return pthread_getspecific(key); + } else { + return perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)]; + } +} + +int perftools_pthread_setspecific(pthread_key_t key, void *val) { + if (pthread_setspecific) { + return pthread_setspecific(key, val); + } else { + perftools_pthread_specific_vals[memcpy_cast<pthread_key_t, int>(key)] = val; + return 0; + } +} + + +static pthread_once_t pthread_once_init = PTHREAD_ONCE_INIT; +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)) { +#ifdef __FreeBSD__ + // On __FreeBSD__, calling pthread_once on a system that is not + // linked with -pthread is silently a noop. :-( Luckily, we have a + // workaround: FreeBSD exposes __isthreaded in <stdio.h>, which is + // set to 1 when the first thread is spawned. So on those systems, + // we can use our own separate pthreads-once mechanism, which is + // used until __isthreaded is 1 (which will never be true if the app + // is not linked with -pthread). + static bool pthread_once_ran_before_threads = false; + if (pthread_once_ran_before_threads) { + return 0; + } + if (!__isthreaded) { + init_routine(); + pthread_once_ran_before_threads = true; + return 0; + } +#endif + if (pthread_once) { + return pthread_once(ctl, init_routine); + } else { + if (memcmp(ctl, &pthread_once_init, sizeof(*ctl)) == 0) { + init_routine(); + ++*(char*)(ctl); // make it so it's no longer equal to init + } + return 0; + } +} + +#ifdef HAVE_FORK + +void perftools_pthread_atfork(void (*before)(), + void (*parent_after)(), + void (*child_after)()) { + if (pthread_atfork) { + int rv = pthread_atfork(before, parent_after, child_after); + CHECK(rv == 0); + } +} + +#endif diff --git a/src/third_party/gperftools-2.7/src/maybe_threads.h b/src/third_party/gperftools-2.7/src/maybe_threads.h new file mode 100644 index 00000000000..c6cfdf7d158 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/maybe_threads.h @@ -0,0 +1,61 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> + +//------------------------------------------------------------------- +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +//------------------------------------------------------------------- + +#ifndef GOOGLE_MAYBE_THREADS_H_ +#define GOOGLE_MAYBE_THREADS_H_ + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)); +int perftools_pthread_key_delete(pthread_key_t key); +void *perftools_pthread_getspecific(pthread_key_t key); +int perftools_pthread_setspecific(pthread_key_t key, void *val); +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)); + +// Our wrapper for pthread_atfork. Does _nothing_ when there are no +// threads. See static_vars.cc:SetupAtForkLocksHandler for only user +// of this. +void perftools_pthread_atfork(void (*before)(), + void (*parent_after)(), + void (*child_after)()); + +#endif /* GOOGLE_MAYBE_THREADS_H_ */ diff --git a/src/third_party/gperftools-2.7/src/memfs_malloc.cc b/src/third_party/gperftools-2.7/src/memfs_malloc.cc new file mode 100644 index 00000000000..fd26daff6b2 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/memfs_malloc.cc @@ -0,0 +1,272 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// A tcmalloc system allocator that uses a memory based filesystem such as +// tmpfs or hugetlbfs +// +// Since these only exist on linux, we only register this allocator there. + +#ifdef __linux + +#include <config.h> +#include <errno.h> // for errno, EINVAL +#include <inttypes.h> // for PRId64 +#include <limits.h> // for PATH_MAX +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int64_t, uintptr_t +#endif +#include <stdio.h> // for snprintf +#include <stdlib.h> // for mkstemp +#include <string.h> // for strerror +#include <sys/mman.h> // for mmap, MAP_FAILED, etc +#include <sys/statfs.h> // for fstatfs, statfs +#include <unistd.h> // for ftruncate, off_t, unlink +#include <new> // for operator new +#include <string> + +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/sysinfo.h" +#include "internal_logging.h" + +// TODO(sanjay): Move the code below into the tcmalloc namespace +using tcmalloc::kLog; +using tcmalloc::kCrash; +using tcmalloc::Log; +using std::string; + +DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""), + "Path where hugetlbfs or tmpfs is mounted. The caller is " + "responsible for ensuring that the path is unique and does " + "not conflict with another process"); +DEFINE_int64(memfs_malloc_limit_mb, + EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0), + "Limit total allocation size to the " + "specified number of MiB. 0 == no limit."); +DEFINE_bool(memfs_malloc_abort_on_fail, + EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false), + "abort() whenever memfs_malloc fails to satisfy an allocation " + "for any reason."); +DEFINE_bool(memfs_malloc_ignore_mmap_fail, + EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false), + "Ignore failures from mmap"); +DEFINE_bool(memfs_malloc_map_private, + EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false), + "Use MAP_PRIVATE with mmap"); + +// Hugetlbfs based allocator for tcmalloc +class HugetlbSysAllocator: public SysAllocator { +public: + explicit HugetlbSysAllocator(SysAllocator* fallback) + : failed_(true), // To disable allocator until Initialize() is called. + big_page_size_(0), + hugetlb_fd_(-1), + hugetlb_base_(0), + fallback_(fallback) { + } + + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + bool Initialize(); + + bool failed_; // Whether failed to allocate memory. + +private: + void* AllocInternal(size_t size, size_t *actual_size, size_t alignment); + + int64 big_page_size_; + int hugetlb_fd_; // file descriptor for hugetlb + off_t hugetlb_base_; + + SysAllocator* fallback_; // Default system allocator to fall back to. +}; +static union { + char buf[sizeof(HugetlbSysAllocator)]; + void *ptr; +} hugetlb_space; + +// No locking needed here since we assume that tcmalloc calls +// us with an internal lock held (see tcmalloc/system-alloc.cc). +void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + if (failed_) { + return fallback_->Alloc(size, actual_size, alignment); + } + + // We don't respond to allocation requests smaller than big_page_size_ unless + // the caller is ok to take more than they asked for. Used by MetaDataAlloc. + if (actual_size == NULL && size < big_page_size_) { + return fallback_->Alloc(size, actual_size, alignment); + } + + // Enforce huge page alignment. Be careful to deal with overflow. + size_t new_alignment = alignment; + if (new_alignment < big_page_size_) new_alignment = big_page_size_; + size_t aligned_size = ((size + new_alignment - 1) / + new_alignment) * new_alignment; + if (aligned_size < size) { + return fallback_->Alloc(size, actual_size, alignment); + } + + void* result = AllocInternal(aligned_size, actual_size, new_alignment); + if (result != NULL) { + return result; + } + Log(kLog, __FILE__, __LINE__, + "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_); + if (FLAGS_memfs_malloc_abort_on_fail) { + Log(kCrash, __FILE__, __LINE__, + "memfs_malloc_abort_on_fail is set"); + } + return fallback_->Alloc(size, actual_size, alignment); +} + +void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size, + size_t alignment) { + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > big_page_size_) { + extra = alignment - big_page_size_; + } + + // Test if this allocation would put us over the limit. + off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024; + if (limit > 0 && hugetlb_base_ + size + extra > limit) { + // Disable the allocator when there's less than one page left. + if (limit - hugetlb_base_ < big_page_size_) { + Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb"); + failed_ = true; + } + else { + Log(kLog, __FILE__, __LINE__, + "alloc too large (size, bytes left)", size, limit-hugetlb_base_); + } + return NULL; + } + + // This is not needed for hugetlbfs, but needed for tmpfs. Annoyingly + // hugetlbfs returns EINVAL for ftruncate. + int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra); + if (ret != 0 && errno != EINVAL) { + Log(kLog, __FILE__, __LINE__, + "ftruncate failed", strerror(errno)); + failed_ = true; + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result; + result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + FLAGS_memfs_malloc_map_private ? MAP_PRIVATE : MAP_SHARED, + hugetlb_fd_, hugetlb_base_); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + if (!FLAGS_memfs_malloc_ignore_mmap_fail) { + Log(kLog, __FILE__, __LINE__, + "mmap failed (size, error)", size + extra, strerror(errno)); + failed_ = true; + } + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + ptr += adjust; + hugetlb_base_ += (size + extra); + + if (actual_size) { + *actual_size = size + extra - adjust; + } + + return reinterpret_cast<void*>(ptr); +} + +bool HugetlbSysAllocator::Initialize() { + char path[PATH_MAX]; + const int pathlen = FLAGS_memfs_malloc_path.size(); + if (pathlen + 8 > sizeof(path)) { + Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long"); + return false; + } + memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen); + memcpy(path + pathlen, ".XXXXXX", 8); // Also copies terminating \0 + + int hugetlb_fd = mkstemp(path); + if (hugetlb_fd == -1) { + Log(kLog, __FILE__, __LINE__, + "warning: unable to create memfs_malloc_path", + path, strerror(errno)); + return false; + } + + // Cleanup memory on process exit + if (unlink(path) == -1) { + Log(kCrash, __FILE__, __LINE__, + "fatal: error unlinking memfs_malloc_path", path, strerror(errno)); + return false; + } + + // Use fstatfs to figure out the default page size for memfs + struct statfs sfs; + if (fstatfs(hugetlb_fd, &sfs) == -1) { + Log(kCrash, __FILE__, __LINE__, + "fatal: error fstatfs of memfs_malloc_path", strerror(errno)); + return false; + } + int64 page_size = sfs.f_bsize; + + hugetlb_fd_ = hugetlb_fd; + big_page_size_ = page_size; + failed_ = false; + return true; +} + +REGISTER_MODULE_INITIALIZER(memfs_malloc, { + if (FLAGS_memfs_malloc_path.length()) { + SysAllocator* alloc = MallocExtension::instance()->GetSystemAllocator(); + HugetlbSysAllocator* hp = + new (hugetlb_space.buf) HugetlbSysAllocator(alloc); + if (hp->Initialize()) { + MallocExtension::instance()->SetSystemAllocator(hp); + } + } +}); + +#endif /* ifdef __linux */ diff --git a/src/third_party/gperftools-2.7/src/memory_region_map.cc b/src/third_party/gperftools-2.7/src/memory_region_map.cc new file mode 100755 index 00000000000..841d6f3cf85 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/memory_region_map.cc @@ -0,0 +1,831 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +// +// Background and key design points of MemoryRegionMap. +// +// MemoryRegionMap is a low-level module with quite atypical requirements that +// result in some degree of non-triviality of the implementation and design. +// +// MemoryRegionMap collects info about *all* memory regions created with +// mmap, munmap, mremap, sbrk. +// They key word above is 'all': all that are happening in a process +// during its lifetime frequently starting even before global object +// constructor execution. +// +// This is needed by the primary client of MemoryRegionMap: +// HeapLeakChecker uses the regions and the associated stack traces +// to figure out what part of the memory is the heap: +// if MemoryRegionMap were to miss some (early) regions, leak checking would +// stop working correctly. +// +// To accomplish the goal of functioning before/during global object +// constructor execution MemoryRegionMap is done as a singleton service +// that relies on own on-demand initialized static constructor-less data, +// and only relies on other low-level modules that can also function properly +// even before global object constructors run. +// +// Accomplishing the goal of collecting data about all mmap, munmap, mremap, +// sbrk occurrences is a more involved: conceptually to do this one needs to +// record some bits of data in particular about any mmap or sbrk call, +// but to do that one needs to allocate memory for that data at some point, +// but all memory allocations in the end themselves come from an mmap +// or sbrk call (that's how the address space of the process grows). +// +// Also note that we need to do all the above recording from +// within an mmap/sbrk hook which is sometimes/frequently is made by a memory +// allocator, including the allocator MemoryRegionMap itself must rely on. +// In the case of heap-checker usage this includes even the very first +// mmap/sbrk call happening in the program: heap-checker gets activated due to +// a link-time installed mmap/sbrk hook and it initializes MemoryRegionMap +// and asks it to record info about this very first call right from that +// very first hook invocation. +// +// MemoryRegionMap is doing its memory allocations via LowLevelAlloc: +// unlike more complex standard memory allocator, LowLevelAlloc cooperates with +// MemoryRegionMap by not holding any of its own locks while it calls mmap +// to get memory, thus we are able to call LowLevelAlloc from +// our mmap/sbrk hooks without causing a deadlock in it. +// For the same reason of deadlock prevention the locking in MemoryRegionMap +// itself is write-recursive which is an exception to Google's mutex usage. +// +// We still need to break the infinite cycle of mmap calling our hook, +// which asks LowLevelAlloc for memory to record this mmap, +// which (sometimes) causes mmap, which calls our hook, and so on. +// We do this as follows: on a recursive call of MemoryRegionMap's +// mmap/sbrk/mremap hook we record the data about the allocation in a +// static fixed-sized stack (saved_regions and saved_buckets), when the +// recursion unwinds but before returning from the outer hook call we unwind +// this stack and move the data from saved_regions and saved_buckets to its +// permanent place in the RegionSet and "bucket_table" respectively, +// which can cause more allocations and mmap-s and recursion and unwinding, +// but the whole process ends eventually due to the fact that for the small +// allocations we are doing LowLevelAlloc reuses one mmap call and parcels out +// the memory it created to satisfy several of our allocation requests. +// + +// ========================================================================= // + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#elif !defined(MAP_FAILED) +#define MAP_FAILED -1 // the only thing we need from mman.h +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_t, pthread_self() +#endif +#include <stddef.h> + +#include <algorithm> +#include <set> + +#include "memory_region_map.h" + +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" + +#include <gperftools/stacktrace.h> +#include <gperftools/malloc_hook.h> + +// MREMAP_FIXED is a linux extension. How it's used in this file, +// setting it to 0 is equivalent to saying, "This feature isn't +// supported", which is right. +#ifndef MREMAP_FIXED +# define MREMAP_FIXED 0 +#endif + +using std::max; + +// ========================================================================= // + +int MemoryRegionMap::client_count_ = 0; +int MemoryRegionMap::max_stack_depth_ = 0; +MemoryRegionMap::RegionSet* MemoryRegionMap::regions_ = NULL; +LowLevelAlloc::Arena* MemoryRegionMap::arena_ = NULL; +SpinLock MemoryRegionMap::lock_(SpinLock::LINKER_INITIALIZED); +SpinLock MemoryRegionMap::owner_lock_( // ACQUIRED_AFTER(lock_) + SpinLock::LINKER_INITIALIZED); +int MemoryRegionMap::recursion_count_ = 0; // GUARDED_BY(owner_lock_) +pthread_t MemoryRegionMap::lock_owner_tid_; // GUARDED_BY(owner_lock_) +int64 MemoryRegionMap::map_size_ = 0; +int64 MemoryRegionMap::unmap_size_ = 0; +HeapProfileBucket** MemoryRegionMap::bucket_table_ = NULL; // GUARDED_BY(lock_) +int MemoryRegionMap::num_buckets_ = 0; // GUARDED_BY(lock_) +int MemoryRegionMap::saved_buckets_count_ = 0; // GUARDED_BY(lock_) +HeapProfileBucket MemoryRegionMap::saved_buckets_[20]; // GUARDED_BY(lock_) + +// GUARDED_BY(lock_) +const void* MemoryRegionMap::saved_buckets_keys_[20][kMaxStackDepth]; + +// ========================================================================= // + +// Simple hook into execution of global object constructors, +// so that we do not call pthread_self() when it does not yet work. +static bool libpthread_initialized = false; +REGISTER_MODULE_INITIALIZER(libpthread_initialized_setter, + libpthread_initialized = true); + +static inline bool current_thread_is(pthread_t should_be) { + // Before main() runs, there's only one thread, so we're always that thread + if (!libpthread_initialized) return true; + // this starts working only sometime well into global constructor execution: + return pthread_equal(pthread_self(), should_be); +} + +// ========================================================================= // + +// Constructor-less place-holder to store a RegionSet in. +union MemoryRegionMap::RegionSetRep { + char rep[sizeof(RegionSet)]; + void* align_it; // do not need a better alignment for 'rep' than this + RegionSet* region_set() { return reinterpret_cast<RegionSet*>(rep); } +}; + +// The bytes where MemoryRegionMap::regions_ will point to. +// We use RegionSetRep with noop c-tor so that global construction +// does not interfere. +static MemoryRegionMap::RegionSetRep regions_rep; + +// ========================================================================= // + +// Has InsertRegionLocked been called recursively +// (or rather should we *not* use regions_ to record a hooked mmap). +static bool recursive_insert = false; + +void MemoryRegionMap::Init(int max_stack_depth, bool use_buckets) { + RAW_VLOG(10, "MemoryRegionMap Init"); + RAW_CHECK(max_stack_depth >= 0, ""); + // Make sure we don't overflow the memory in region stacks: + RAW_CHECK(max_stack_depth <= kMaxStackDepth, + "need to increase kMaxStackDepth?"); + Lock(); + client_count_ += 1; + max_stack_depth_ = max(max_stack_depth_, max_stack_depth); + if (client_count_ > 1) { + // not first client: already did initialization-proper + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Init increment done"); + return; + } + // Set our hooks and make sure they were installed: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); + // We need to set recursive_insert since the NewArena call itself + // will already do some allocations with mmap which our hooks will catch + // recursive_insert allows us to buffer info about these mmap calls. + // Note that Init() can be (and is) sometimes called + // already from within an mmap/sbrk hook. + recursive_insert = true; + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + recursive_insert = false; + HandleSavedRegionsLocked(&InsertRegionLocked); // flush the buffered ones + // Can't instead use HandleSavedRegionsLocked(&DoInsertRegionLocked) before + // recursive_insert = false; as InsertRegionLocked will also construct + // regions_ on demand for us. + if (use_buckets) { + const int table_bytes = kHashTableSize * sizeof(*bucket_table_); + recursive_insert = true; + bucket_table_ = static_cast<HeapProfileBucket**>( + MyAllocator::Allocate(table_bytes)); + recursive_insert = false; + memset(bucket_table_, 0, table_bytes); + num_buckets_ = 0; + } + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Init done"); +} + +bool MemoryRegionMap::Shutdown() { + RAW_VLOG(10, "MemoryRegionMap Shutdown"); + Lock(); + RAW_CHECK(client_count_ > 0, ""); + client_count_ -= 1; + if (client_count_ != 0) { // not last client; need not really shutdown + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); + return true; + } + if (bucket_table_ != NULL) { + for (int i = 0; i < kHashTableSize; i++) { + for (HeapProfileBucket* curr = bucket_table_[i]; curr != 0; /**/) { + HeapProfileBucket* bucket = curr; + curr = curr->next; + MyAllocator::Free(bucket->stack, 0); + MyAllocator::Free(bucket, 0); + } + } + MyAllocator::Free(bucket_table_, 0); + num_buckets_ = 0; + bucket_table_ = NULL; + } + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); + if (regions_) regions_->~RegionSet(); + regions_ = NULL; + bool deleted_arena = LowLevelAlloc::DeleteArena(arena_); + if (deleted_arena) { + arena_ = 0; + } else { + RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); + } + Unlock(); + RAW_VLOG(10, "MemoryRegionMap Shutdown done"); + return deleted_arena; +} + +bool MemoryRegionMap::IsRecordingLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + return client_count_ > 0; +} + +// Invariants (once libpthread_initialized is true): +// * While lock_ is not held, recursion_count_ is 0 (and +// lock_owner_tid_ is the previous owner, but we don't rely on +// that). +// * recursion_count_ and lock_owner_tid_ are only written while +// both lock_ and owner_lock_ are held. They may be read under +// just owner_lock_. +// * At entry and exit of Lock() and Unlock(), the current thread +// owns lock_ iff pthread_equal(lock_owner_tid_, pthread_self()) +// && recursion_count_ > 0. +void MemoryRegionMap::Lock() { + { + SpinLockHolder l(&owner_lock_); + if (recursion_count_ > 0 && current_thread_is(lock_owner_tid_)) { + RAW_CHECK(lock_.IsHeld(), "Invariants violated"); + recursion_count_++; + RAW_CHECK(recursion_count_ <= 5, + "recursive lock nesting unexpectedly deep"); + return; + } + } + lock_.Lock(); + { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ == 0, + "Last Unlock didn't reset recursion_count_"); + if (libpthread_initialized) + lock_owner_tid_ = pthread_self(); + recursion_count_ = 1; + } +} + +void MemoryRegionMap::Unlock() { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ > 0, "unlock when not held"); + RAW_CHECK(lock_.IsHeld(), + "unlock when not held, and recursion_count_ is wrong"); + RAW_CHECK(current_thread_is(lock_owner_tid_), "unlock by non-holder"); + recursion_count_--; + if (recursion_count_ == 0) { + lock_.Unlock(); + } +} + +bool MemoryRegionMap::LockIsHeld() { + SpinLockHolder l(&owner_lock_); + return lock_.IsHeld() && current_thread_is(lock_owner_tid_); +} + +const MemoryRegionMap::Region* +MemoryRegionMap::DoFindRegionLocked(uintptr_t addr) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + if (regions_ != NULL) { + Region sample; + sample.SetRegionSetKey(addr); + RegionSet::iterator region = regions_->lower_bound(sample); + if (region != regions_->end()) { + RAW_CHECK(addr <= region->end_addr, ""); + if (region->start_addr <= addr && addr < region->end_addr) { + return &(*region); + } + } + } + return NULL; +} + +bool MemoryRegionMap::FindRegion(uintptr_t addr, Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(addr); + if (region != NULL) *result = *region; // create it as an independent copy + Unlock(); + return region != NULL; +} + +bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, + Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(stack_top); + if (region != NULL) { + RAW_VLOG(10, "Stack at %p is inside region %p..%p", + reinterpret_cast<void*>(stack_top), + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + const_cast<Region*>(region)->set_is_stack(); // now we know + // cast is safe (set_is_stack does not change the set ordering key) + *result = *region; // create *result as an independent copy + } + Unlock(); + return region != NULL; +} + +HeapProfileBucket* MemoryRegionMap::GetBucket(int depth, + const void* const key[]) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + // Make hash-value + uintptr_t hash = 0; + for (int i = 0; i < depth; i++) { + hash += reinterpret_cast<uintptr_t>(key[i]); + hash += hash << 10; + hash ^= hash >> 6; + } + hash += hash << 3; + hash ^= hash >> 11; + + // Lookup stack trace in table + unsigned int hash_index = (static_cast<unsigned int>(hash)) % kHashTableSize; + for (HeapProfileBucket* bucket = bucket_table_[hash_index]; + bucket != 0; + bucket = bucket->next) { + if ((bucket->hash == hash) && (bucket->depth == depth) && + std::equal(key, key + depth, bucket->stack)) { + return bucket; + } + } + + // Create new bucket + const size_t key_size = sizeof(key[0]) * depth; + HeapProfileBucket* bucket; + if (recursive_insert) { // recursion: save in saved_buckets_ + const void** key_copy = saved_buckets_keys_[saved_buckets_count_]; + std::copy(key, key + depth, key_copy); + bucket = &saved_buckets_[saved_buckets_count_]; + memset(bucket, 0, sizeof(*bucket)); + ++saved_buckets_count_; + bucket->stack = key_copy; + bucket->next = NULL; + } else { + recursive_insert = true; + const void** key_copy = static_cast<const void**>( + MyAllocator::Allocate(key_size)); + recursive_insert = false; + std::copy(key, key + depth, key_copy); + recursive_insert = true; + bucket = static_cast<HeapProfileBucket*>( + MyAllocator::Allocate(sizeof(HeapProfileBucket))); + recursive_insert = false; + memset(bucket, 0, sizeof(*bucket)); + bucket->stack = key_copy; + bucket->next = bucket_table_[hash_index]; + } + bucket->hash = hash; + bucket->depth = depth; + bucket_table_[hash_index] = bucket; + ++num_buckets_; + return bucket; +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::BeginRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->begin(); +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->end(); +} + +inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { + RAW_VLOG(12, "Inserting region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RegionSet::const_iterator i = regions_->lower_bound(region); + if (i != regions_->end() && i->start_addr <= region.start_addr) { + RAW_DCHECK(region.end_addr <= i->end_addr, ""); // lower_bound ensures this + return; // 'region' is a subset of an already recorded region; do nothing + // We can be stricter and allow this only when *i has been created via + // an mmap with MAP_NORESERVE flag set. + } + if (DEBUG_MODE) { + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + Region sample; + sample.SetRegionSetKey(region.start_addr); + i = regions_->lower_bound(sample); + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + } + region.AssertIsConsistent(); // just making sure + // This inserts and allocates permanent storage for region + // and its call stack data: it's safe to do it now: + regions_->insert(region); + RAW_VLOG(12, "Inserted region %p..%p :", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr)); + if (VLOG_IS_ON(12)) LogAllLocked(); +} + +// These variables are local to MemoryRegionMap::InsertRegionLocked() +// and MemoryRegionMap::HandleSavedRegionsLocked() +// and are file-level to ensure that they are initialized at load time. + +// Number of unprocessed region inserts. +static int saved_regions_count = 0; + +// Unprocessed inserts (must be big enough to hold all allocations that can +// be caused by a InsertRegionLocked call). +// Region has no constructor, so that c-tor execution does not interfere +// with the any-time use of the static memory behind saved_regions. +static MemoryRegionMap::Region saved_regions[20]; + +inline void MemoryRegionMap::HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)) { + while (saved_regions_count > 0) { + // Making a local-var copy of the region argument to insert_func + // including its stack (w/o doing any memory allocations) is important: + // in many cases the memory in saved_regions + // will get written-to during the (*insert_func)(r) call below. + Region r = saved_regions[--saved_regions_count]; + (*insert_func)(r); + } +} + +void MemoryRegionMap::RestoreSavedBucketsLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + while (saved_buckets_count_ > 0) { + HeapProfileBucket bucket = saved_buckets_[--saved_buckets_count_]; + unsigned int hash_index = + static_cast<unsigned int>(bucket.hash) % kHashTableSize; + bool is_found = false; + for (HeapProfileBucket* curr = bucket_table_[hash_index]; + curr != 0; + curr = curr->next) { + if ((curr->hash == bucket.hash) && (curr->depth == bucket.depth) && + std::equal(bucket.stack, bucket.stack + bucket.depth, curr->stack)) { + curr->allocs += bucket.allocs; + curr->alloc_size += bucket.alloc_size; + curr->frees += bucket.frees; + curr->free_size += bucket.free_size; + is_found = true; + break; + } + } + if (is_found) continue; + + const size_t key_size = sizeof(bucket.stack[0]) * bucket.depth; + const void** key_copy = static_cast<const void**>( + MyAllocator::Allocate(key_size)); + std::copy(bucket.stack, bucket.stack + bucket.depth, key_copy); + HeapProfileBucket* new_bucket = static_cast<HeapProfileBucket*>( + MyAllocator::Allocate(sizeof(HeapProfileBucket))); + memset(new_bucket, 0, sizeof(*new_bucket)); + new_bucket->hash = bucket.hash; + new_bucket->depth = bucket.depth; + new_bucket->stack = key_copy; + new_bucket->next = bucket_table_[hash_index]; + bucket_table_[hash_index] = new_bucket; + ++num_buckets_; + } +} + +inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + // We can be called recursively, because RegionSet constructor + // and DoInsertRegionLocked() (called below) can call the allocator. + // recursive_insert tells us if that's the case. When this happens, + // region insertion information is recorded in saved_regions[], + // and taken into account when the recursion unwinds. + // Do the insert: + if (recursive_insert) { // recursion: save in saved_regions + RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RAW_CHECK(saved_regions_count < arraysize(saved_regions), ""); + // Copy 'region' to saved_regions[saved_regions_count] + // together with the contents of its call_stack, + // then increment saved_regions_count. + saved_regions[saved_regions_count++] = region; + } else { // not a recusrive call + if (regions_ == NULL) { // init regions_ + RAW_VLOG(12, "Initializing region set"); + regions_ = regions_rep.region_set(); + recursive_insert = true; + new(regions_) RegionSet(); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } + recursive_insert = true; + // Do the actual insertion work to put new regions into regions_: + DoInsertRegionLocked(region); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } +} + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 1; +#else +static const int kStripFrames = 3; +#endif + +void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { + // Record start/end info about this memory acquisition call in a new region: + Region region; + region.Create(start, size); + // First get the call stack info into the local varible 'region': + int depth = 0; + // NOTE: libunwind also does mmap and very much likely while holding + // it's own lock(s). So some threads may first take libunwind lock, + // and then take region map lock (necessary to record mmap done from + // inside libunwind). On the other hand other thread(s) may do + // normal mmap. Which would call this method to record it. Which + // would then proceed with installing that record to region map + // while holding region map lock. That may cause mmap from our own + // internal allocators, so attempt to unwind in this case may cause + // reverse order of taking libuwind and region map locks. Which is + // obvious deadlock. + // + // Thankfully, we can easily detect if we're holding region map lock + // and avoid recording backtrace in this (rare and largely + // irrelevant) case. By doing this we "declare" that thread needing + // both locks must take region map lock last. In other words we do + // not allow taking libuwind lock when we already have region map + // lock. Note, this is generally impossible when somebody tries to + // mix cpu profiling and heap checking/profiling, because cpu + // profiler grabs backtraces at arbitrary places. But at least such + // combination is rarer and less relevant. + if (max_stack_depth_ > 0 && !LockIsHeld()) { + depth = MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack), + max_stack_depth_, kStripFrames + 1); + } + region.set_call_stack_depth(depth); // record stack info fully + RAW_VLOG(10, "New global region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + // Note: none of the above allocates memory. + Lock(); // recursively lock + map_size_ += size; + InsertRegionLocked(region); + // This will (eventually) allocate storage for and copy over the stack data + // from region.call_stack_data_ that is pointed by region.call_stack(). + if (bucket_table_ != NULL) { + HeapProfileBucket* b = GetBucket(depth, region.call_stack); + ++b->allocs; + b->alloc_size += size; + if (!recursive_insert) { + recursive_insert = true; + RestoreSavedBucketsLocked(); + recursive_insert = false; + } + } + Unlock(); +} + +void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { + Lock(); + if (recursive_insert) { + // First remove the removed region from saved_regions, if it's + // there, to prevent overrunning saved_regions in recursive + // map/unmap call sequences, and also from later inserting regions + // which have already been unmapped. + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + int put_pos = 0; + int old_count = saved_regions_count; + for (int i = 0; i < old_count; ++i, ++put_pos) { + Region& r = saved_regions[i]; + if (r.start_addr == start_addr && r.end_addr == end_addr) { + // An exact match, so it's safe to remove. + RecordRegionRemovalInBucket(r.call_stack_depth, r.call_stack, size); + --saved_regions_count; + --put_pos; + RAW_VLOG(10, ("Insta-Removing saved region %p..%p; " + "now have %d saved regions"), + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + saved_regions_count); + } else { + if (put_pos < i) { + saved_regions[put_pos] = saved_regions[i]; + } + } + } + } + if (regions_ == NULL) { // We must have just unset the hooks, + // but this thread was already inside the hook. + Unlock(); + return; + } + if (!recursive_insert) { + HandleSavedRegionsLocked(&InsertRegionLocked); + } + // first handle adding saved regions if any + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + // subtract start_addr, end_addr from all the regions + RAW_VLOG(10, "Removing global region %p..%p; have %" PRIuS " regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + Region sample; + sample.SetRegionSetKey(start_addr); + // Only iterate over the regions that might overlap start_addr..end_addr: + for (RegionSet::iterator region = regions_->lower_bound(sample); + region != regions_->end() && region->start_addr < end_addr; + /*noop*/) { + RAW_VLOG(13, "Looking at region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + if (start_addr <= region->start_addr && + region->end_addr <= end_addr) { // full deletion + RAW_VLOG(12, "Deleting region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + region->end_addr - region->start_addr); + RegionSet::iterator d = region; + ++region; + regions_->erase(d); + continue; + } else if (region->start_addr < start_addr && + end_addr < region->end_addr) { // cutting-out split + RAW_VLOG(12, "Splitting region %p..%p in two", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + end_addr - start_addr); + // Make another region for the start portion: + // The new region has to be the start portion because we can't + // just modify region->end_addr as it's the sorting key. + Region r = *region; + r.set_end_addr(start_addr); + InsertRegionLocked(r); + // cut *region from start: + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (end_addr > region->start_addr && + start_addr <= region->start_addr) { // cut from start + RAW_VLOG(12, "Start-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + end_addr - region->start_addr); + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (start_addr > region->start_addr && + start_addr < region->end_addr) { // cut from end + RAW_VLOG(12, "End-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RecordRegionRemovalInBucket(region->call_stack_depth, region->call_stack, + region->end_addr - start_addr); + // Can't just modify region->end_addr (it's the sorting key): + Region r = *region; + r.set_end_addr(start_addr); + RegionSet::iterator d = region; + ++region; + // It's safe to erase before inserting since r is independent of *d: + // r contains an own copy of the call stack: + regions_->erase(d); + InsertRegionLocked(r); + continue; + } + ++region; + } + RAW_VLOG(12, "Removed region %p..%p; have %" PRIuS " regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + if (VLOG_IS_ON(12)) LogAllLocked(); + unmap_size_ += size; + Unlock(); +} + +void MemoryRegionMap::RecordRegionRemovalInBucket(int depth, + const void* const stack[], + size_t size) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + if (bucket_table_ == NULL) return; + HeapProfileBucket* b = GetBucket(depth, stack); + ++b->frees; + b->free_size += size; +} + +void MemoryRegionMap::MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset) { + // TODO(maxim): replace all 0x%" PRIxS " by %p when RAW_VLOG uses a safe + // snprintf reimplementation that does not malloc to pretty-print NULL + RAW_VLOG(10, "MMap = 0x%" PRIxPTR " of %" PRIuS " at %" PRIu64 " " + "prot %d flags %d fd %d offs %" PRId64, + reinterpret_cast<uintptr_t>(result), size, + reinterpret_cast<uint64>(start), prot, flags, fd, + static_cast<int64>(offset)); + if (result != reinterpret_cast<void*>(MAP_FAILED) && size != 0) { + RecordRegionAddition(result, size); + } +} + +void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { + RAW_VLOG(10, "MUnmap of %p %" PRIuS "", ptr, size); + if (size != 0) { + RecordRegionRemoval(ptr, size); + } +} + +void MemoryRegionMap::MremapHook(const void* result, + const void* old_addr, size_t old_size, + size_t new_size, int flags, + const void* new_addr) { + RAW_VLOG(10, "MRemap = 0x%" PRIxPTR " of 0x%" PRIxPTR " %" PRIuS " " + "to %" PRIuS " flags %d new_addr=0x%" PRIxPTR, + (uintptr_t)result, (uintptr_t)old_addr, + old_size, new_size, flags, + flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0); + if (result != reinterpret_cast<void*>(-1)) { + RecordRegionRemoval(old_addr, old_size); + RecordRegionAddition(result, new_size); + } +} + +void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { + RAW_VLOG(10, "Sbrk = 0x%" PRIxPTR " of %" PRIdS "", (uintptr_t)result, increment); + if (result != reinterpret_cast<void*>(-1)) { + if (increment > 0) { + void* new_end = sbrk(0); + RecordRegionAddition(result, reinterpret_cast<uintptr_t>(new_end) - + reinterpret_cast<uintptr_t>(result)); + } else if (increment < 0) { + void* new_end = sbrk(0); + RecordRegionRemoval(new_end, reinterpret_cast<uintptr_t>(result) - + reinterpret_cast<uintptr_t>(new_end)); + } + } +} + +void MemoryRegionMap::LogAllLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_LOG(INFO, "List of regions:"); + uintptr_t previous = 0; + for (RegionSet::const_iterator r = regions_->begin(); + r != regions_->end(); ++r) { + RAW_LOG(INFO, "Memory region 0x%" PRIxPTR "..0x%" PRIxPTR " " + "from 0x%" PRIxPTR " stack=%d", + r->start_addr, r->end_addr, r->caller(), r->is_stack); + RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order"); + // this must be caused by uncontrolled recursive operations on regions_ + previous = r->end_addr; + } + RAW_LOG(INFO, "End of regions list"); +} diff --git a/src/third_party/gperftools-2.7/src/memory_region_map.h b/src/third_party/gperftools-2.7/src/memory_region_map.h new file mode 100644 index 00000000000..ec388e1cc54 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/memory_region_map.h @@ -0,0 +1,413 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +#ifndef BASE_MEMORY_REGION_MAP_H_ +#define BASE_MEMORY_REGION_MAP_H_ + +#include <config.h> + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <stddef.h> +#include <set> +#include "base/stl_allocator.h" +#include "base/spinlock.h" +#include "base/thread_annotations.h" +#include "base/low_level_alloc.h" +#include "heap-profile-stats.h" + +// TODO(maxim): add a unittest: +// execute a bunch of mmaps and compare memory map what strace logs +// execute a bunch of mmap/munmup and compare memory map with +// own accounting of what those mmaps generated + +// Thread-safe class to collect and query the map of all memory regions +// in a process that have been created with mmap, munmap, mremap, sbrk. +// For each memory region, we keep track of (and provide to users) +// the stack trace that allocated that memory region. +// The recorded stack trace depth is bounded by +// a user-supplied max_stack_depth parameter of Init(). +// After initialization with Init() +// (which can happened even before global object constructor execution) +// we collect the map by installing and monitoring MallocHook-s +// to mmap, munmap, mremap, sbrk. +// At any time one can query this map via provided interface. +// For more details on the design of MemoryRegionMap +// see the comment at the top of our .cc file. +class MemoryRegionMap { + private: + // Max call stack recording depth supported by Init(). Set it to be + // high enough for all our clients. Note: we do not define storage + // for this (doing that requires special handling in windows), so + // don't take the address of it! + static const int kMaxStackDepth = 32; + + // Size of the hash table of buckets. A structure of the bucket table is + // described in heap-profile-stats.h. + static const int kHashTableSize = 179999; + + public: + // interface ================================================================ + + // Every client of MemoryRegionMap must call Init() before first use, + // and Shutdown() after last use. This allows us to reference count + // this (singleton) class properly. MemoryRegionMap assumes it's the + // only client of MallocHooks, so a client can only register other + // MallocHooks after calling Init() and must unregister them before + // calling Shutdown(). + + // Initialize this module to record memory allocation stack traces. + // Stack traces that have more than "max_stack_depth" frames + // are automatically shrunk to "max_stack_depth" when they are recorded. + // Init() can be called more than once w/o harm, largest max_stack_depth + // will be the effective one. + // When "use_buckets" is true, then counts of mmap and munmap sizes will be + // recorded with each stack trace. If Init() is called more than once, then + // counting will be effective after any call contained "use_buckets" of true. + // It will install mmap, munmap, mremap, sbrk hooks + // and initialize arena_ and our hook and locks, hence one can use + // MemoryRegionMap::Lock()/Unlock() to manage the locks. + // Uses Lock/Unlock inside. + static void Init(int max_stack_depth, bool use_buckets); + + // Try to shutdown this module undoing what Init() did. + // Returns true iff could do full shutdown (or it was not attempted). + // Full shutdown is attempted when the number of Shutdown() calls equals + // the number of Init() calls. + static bool Shutdown(); + + // Return true if MemoryRegionMap is initialized and recording, i.e. when + // then number of Init() calls are more than the number of Shutdown() calls. + static bool IsRecordingLocked(); + + // Locks to protect our internal data structures. + // These also protect use of arena_ if our Init() has been done. + // The lock is recursive. + static void Lock() EXCLUSIVE_LOCK_FUNCTION(lock_); + static void Unlock() UNLOCK_FUNCTION(lock_); + + // Returns true when the lock is held by this thread (for use in RAW_CHECK-s). + static bool LockIsHeld(); + + // Locker object that acquires the MemoryRegionMap::Lock + // for the duration of its lifetime (a C++ scope). + class LockHolder { + public: + LockHolder() { Lock(); } + ~LockHolder() { Unlock(); } + private: + DISALLOW_COPY_AND_ASSIGN(LockHolder); + }; + + // A memory region that we know about through malloc_hook-s. + // This is essentially an interface through which MemoryRegionMap + // exports the collected data to its clients. Thread-compatible. + struct Region { + uintptr_t start_addr; // region start address + uintptr_t end_addr; // region end address + int call_stack_depth; // number of caller stack frames that we saved + const void* call_stack[kMaxStackDepth]; // caller address stack array + // filled to call_stack_depth size + bool is_stack; // does this region contain a thread's stack: + // a user of MemoryRegionMap supplies this info + + // Convenience accessor for call_stack[0], + // i.e. (the program counter of) the immediate caller + // of this region's allocation function, + // but it also returns NULL when call_stack_depth is 0, + // i.e whe we weren't able to get the call stack. + // This usually happens in recursive calls, when the stack-unwinder + // calls mmap() which in turn calls the stack-unwinder. + uintptr_t caller() const { + return reinterpret_cast<uintptr_t>(call_stack_depth >= 1 + ? call_stack[0] : NULL); + } + + // Return true iff this region overlaps region x. + bool Overlaps(const Region& x) const { + return start_addr < x.end_addr && end_addr > x.start_addr; + } + + private: // helpers for MemoryRegionMap + friend class MemoryRegionMap; + + // The ways we create Region-s: + void Create(const void* start, size_t size) { + start_addr = reinterpret_cast<uintptr_t>(start); + end_addr = start_addr + size; + is_stack = false; // not a stack till marked such + call_stack_depth = 0; + AssertIsConsistent(); + } + void set_call_stack_depth(int depth) { + RAW_DCHECK(call_stack_depth == 0, ""); // only one such set is allowed + call_stack_depth = depth; + AssertIsConsistent(); + } + + // The ways we modify Region-s: + void set_is_stack() { is_stack = true; } + void set_start_addr(uintptr_t addr) { + start_addr = addr; + AssertIsConsistent(); + } + void set_end_addr(uintptr_t addr) { + end_addr = addr; + AssertIsConsistent(); + } + + // Verifies that *this contains consistent data, crashes if not the case. + void AssertIsConsistent() const { + RAW_DCHECK(start_addr < end_addr, ""); + RAW_DCHECK(call_stack_depth >= 0 && + call_stack_depth <= kMaxStackDepth, ""); + } + + // Post-default construction helper to make a Region suitable + // for searching in RegionSet regions_. + void SetRegionSetKey(uintptr_t addr) { + // make sure *this has no usable data: + if (DEBUG_MODE) memset(this, 0xFF, sizeof(*this)); + end_addr = addr; + } + + // Note: call_stack[kMaxStackDepth] as a member lets us make Region + // a simple self-contained struct with correctly behaving bit-vise copying. + // This simplifies the code of this module but wastes some memory: + // in most-often use case of this module (leak checking) + // only one call_stack element out of kMaxStackDepth is actually needed. + // Making the storage for call_stack variable-sized, + // substantially complicates memory management for the Region-s: + // as they need to be created and manipulated for some time + // w/o any memory allocations, yet are also given out to the users. + }; + + // Find the region that covers addr and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindRegion(uintptr_t addr, Region* result); + + // Find the region that contains stack_top, mark that region as + // a stack region, and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindAndMarkStackRegion(uintptr_t stack_top, Region* result); + + // Iterate over the buckets which store mmap and munmap counts per stack + // trace. It calls "callback" for each bucket, and passes "arg" to it. + template<class Type> + static void IterateBuckets(void (*callback)(const HeapProfileBucket*, Type), + Type arg); + + // Get the bucket whose caller stack trace is "key". The stack trace is + // used to a depth of "depth" at most. The requested bucket is created if + // needed. + // The bucket table is described in heap-profile-stats.h. + static HeapProfileBucket* GetBucket(int depth, const void* const key[]); + + private: // our internal types ============================================== + + // Region comparator for sorting with STL + struct RegionCmp { + bool operator()(const Region& x, const Region& y) const { + return x.end_addr < y.end_addr; + } + }; + + // We allocate STL objects in our own arena. + struct MyAllocator { + static void *Allocate(size_t n) { + return LowLevelAlloc::AllocWithArena(n, arena_); + } + static void Free(const void *p, size_t /* n */) { + LowLevelAlloc::Free(const_cast<void*>(p)); + } + }; + + // Set of the memory regions + typedef std::set<Region, RegionCmp, + STL_Allocator<Region, MyAllocator> > RegionSet; + + public: // more in-depth interface ========================================== + + // STL iterator with values of Region + typedef RegionSet::const_iterator RegionIterator; + + // Return the begin/end iterators to all the regions. + // These need Lock/Unlock protection around their whole usage (loop). + // Even when the same thread causes modifications during such a loop + // (which are permitted due to recursive locking) + // the loop iterator will still be valid as long as its region + // has not been deleted, but EndRegionLocked should be + // re-evaluated whenever the set of regions has changed. + static RegionIterator BeginRegionLocked(); + static RegionIterator EndRegionLocked(); + + // Return the accumulated sizes of mapped and unmapped regions. + static int64 MapSize() { return map_size_; } + static int64 UnmapSize() { return unmap_size_; } + + // Effectively private type from our .cc ================================= + // public to let us declare global objects: + union RegionSetRep; + + private: + // representation =========================================================== + + // Counter of clients of this module that have called Init(). + static int client_count_; + + // Maximal number of caller stack frames to save (>= 0). + static int max_stack_depth_; + + // Arena used for our allocations in regions_. + static LowLevelAlloc::Arena* arena_; + + // Set of the mmap/sbrk/mremap-ed memory regions + // To be accessed *only* when Lock() is held. + // Hence we protect the non-recursive lock used inside of arena_ + // with our recursive Lock(). This lets a user prevent deadlocks + // when threads are stopped by TCMalloc_ListAllProcessThreads at random spots + // simply by acquiring our recursive Lock() before that. + static RegionSet* regions_; + + // Lock to protect regions_ and buckets_ variables and the data behind. + static SpinLock lock_; + // Lock to protect the recursive lock itself. + static SpinLock owner_lock_; + + // Recursion count for the recursive lock. + static int recursion_count_; + // The thread id of the thread that's inside the recursive lock. + static pthread_t lock_owner_tid_; + + // Total size of all mapped pages so far + static int64 map_size_; + // Total size of all unmapped pages so far + static int64 unmap_size_; + + // Bucket hash table which is described in heap-profile-stats.h. + static HeapProfileBucket** bucket_table_ GUARDED_BY(lock_); + static int num_buckets_ GUARDED_BY(lock_); + + // The following members are local to MemoryRegionMap::GetBucket() + // and MemoryRegionMap::HandleSavedBucketsLocked() + // and are file-level to ensure that they are initialized at load time. + // + // These are used as temporary storage to break the infinite cycle of mmap + // calling our hook which (sometimes) causes mmap. It must be a static + // fixed-size array. The size 20 is just an expected value for safety. + // The details are described in memory_region_map.cc. + + // Number of unprocessed bucket inserts. + static int saved_buckets_count_ GUARDED_BY(lock_); + + // Unprocessed inserts (must be big enough to hold all mmaps that can be + // caused by a GetBucket call). + // Bucket has no constructor, so that c-tor execution does not interfere + // with the any-time use of the static memory behind saved_buckets. + static HeapProfileBucket saved_buckets_[20] GUARDED_BY(lock_); + + static const void* saved_buckets_keys_[20][kMaxStackDepth] GUARDED_BY(lock_); + + // helpers ================================================================== + + // Helper for FindRegion and FindAndMarkStackRegion: + // returns the region covering 'addr' or NULL; assumes our lock_ is held. + static const Region* DoFindRegionLocked(uintptr_t addr); + + // Verifying wrapper around regions_->insert(region) + // To be called to do InsertRegionLocked's work only! + inline static void DoInsertRegionLocked(const Region& region); + // Handle regions saved by InsertRegionLocked into a tmp static array + // by calling insert_func on them. + inline static void HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)); + + // Restore buckets saved in a tmp static array by GetBucket to the bucket + // table where all buckets eventually should be. + static void RestoreSavedBucketsLocked(); + + // Wrapper around DoInsertRegionLocked + // that handles the case of recursive allocator calls. + inline static void InsertRegionLocked(const Region& region); + + // Record addition of a memory region at address "start" of size "size" + // (called from our mmap/mremap/sbrk hooks). + static void RecordRegionAddition(const void* start, size_t size); + // Record deletion of a memory region at address "start" of size "size" + // (called from our munmap/mremap/sbrk hooks). + static void RecordRegionRemoval(const void* start, size_t size); + + // Record deletion of a memory region of size "size" in a bucket whose + // caller stack trace is "key". The stack trace is used to a depth of + // "depth" at most. + static void RecordRegionRemovalInBucket(int depth, + const void* const key[], + size_t size); + + // Hooks for MallocHook + static void MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset); + static void MunmapHook(const void* ptr, size_t size); + static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, int flags, + const void* new_addr); + static void SbrkHook(const void* result, ptrdiff_t increment); + + // Log all memory regions; Useful for debugging only. + // Assumes Lock() is held + static void LogAllLocked(); + + DISALLOW_COPY_AND_ASSIGN(MemoryRegionMap); +}; + +template <class Type> +void MemoryRegionMap::IterateBuckets( + void (*callback)(const HeapProfileBucket*, Type), Type callback_arg) { + for (int index = 0; index < kHashTableSize; index++) { + for (HeapProfileBucket* bucket = bucket_table_[index]; + bucket != NULL; + bucket = bucket->next) { + callback(bucket, callback_arg); + } + } +} + +#endif // BASE_MEMORY_REGION_MAP_H_ diff --git a/src/third_party/gperftools-2.7/src/packed-cache-inl.h b/src/third_party/gperftools-2.7/src/packed-cache-inl.h new file mode 100644 index 00000000000..7c216e5ae39 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/packed-cache-inl.h @@ -0,0 +1,216 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Geoff Pike +// +// This file provides a minimal cache that can hold a <key, value> pair +// with little if any wasted space. The types of the key and value +// must be unsigned integral types or at least have unsigned semantics +// for >>, casting, and similar operations. +// +// Synchronization is not provided. However, the cache is implemented +// as an array of cache entries whose type is chosen at compile time. +// If a[i] is atomic on your hardware for the chosen array type then +// raciness will not necessarily lead to bugginess. The cache entries +// must be large enough to hold a partial key and a value packed +// together. The partial keys are bit strings of length +// kKeybits - kHashbits, and the values are bit strings of length kValuebits. +// +// In an effort to use minimal space, every cache entry represents +// some <key, value> pair; the class provides no way to mark a cache +// entry as empty or uninitialized. In practice, you may want to have +// reserved keys or values to get around this limitation. For example, in +// tcmalloc's PageID-to-sizeclass cache, a value of 0 is used as +// "unknown sizeclass." +// +// Usage Considerations +// -------------------- +// +// kHashbits controls the size of the cache. The best value for +// kHashbits will of course depend on the application. Perhaps try +// tuning the value of kHashbits by measuring different values on your +// favorite benchmark. Also remember not to be a pig; other +// programs that need resources may suffer if you are. +// +// The main uses for this class will be when performance is +// critical and there's a convenient type to hold the cache's +// entries. As described above, the number of bits required +// for a cache entry is (kKeybits - kHashbits) + kValuebits. Suppose +// kKeybits + kValuebits is 43. Then it probably makes sense to +// chose kHashbits >= 11 so that cache entries fit in a uint32. +// +// On the other hand, suppose kKeybits = kValuebits = 64. Then +// using this class may be less worthwhile. You'll probably +// be using 128 bits for each entry anyway, so maybe just pick +// a hash function, H, and use an array indexed by H(key): +// void Put(K key, V value) { a_[H(key)] = pair<K, V>(key, value); } +// V GetOrDefault(K key, V default) { const pair<K, V> &p = a_[H(key)]; ... } +// etc. +// +// Further Details +// --------------- +// +// For caches used only by one thread, the following is true: +// 1. For a cache c, +// (c.Put(key, value), c.GetOrDefault(key, 0)) == value +// and +// (c.Put(key, value), <...>, c.GetOrDefault(key, 0)) == value +// if the elided code contains no c.Put calls. +// +// 2. Has(key) will return false if no <key, value> pair with that key +// has ever been Put. However, a newly initialized cache will have +// some <key, value> pairs already present. When you create a new +// cache, you must specify an "initial value." The initialization +// procedure is equivalent to Clear(initial_value), which is +// equivalent to Put(k, initial_value) for all keys k from 0 to +// 2^kHashbits - 1. +// +// 3. If key and key' differ then the only way Put(key, value) may +// cause Has(key') to change is that Has(key') may change from true to +// false. Furthermore, a Put() call that doesn't change Has(key') +// doesn't change GetOrDefault(key', ...) either. +// +// Implementation details: +// +// This is a direct-mapped cache with 2^kHashbits entries; the hash +// function simply takes the low bits of the key. We store whole keys +// if a whole key plus a whole value fits in an entry. Otherwise, an +// entry is the high bits of a key and a value, packed together. +// E.g., a 20 bit key and a 7 bit value only require a uint16 for each +// entry if kHashbits >= 11. +// +// Alternatives to this scheme will be added as needed. + +#ifndef TCMALLOC_PACKED_CACHE_INL_H_ +#define TCMALLOC_PACKED_CACHE_INL_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include "base/basictypes.h" +#include "common.h" +#include "internal_logging.h" + +// A safe way of doing "(1 << n) - 1" -- without worrying about overflow +// Note this will all be resolved to a constant expression at compile-time +#define N_ONES_(IntType, N) \ + ( (N) == 0 ? 0 : ((static_cast<IntType>(1) << ((N)-1))-1 + \ + (static_cast<IntType>(1) << ((N)-1))) ) + +// The types K and V provide upper bounds on the number of valid keys +// and values, but we explicitly require the keys to be less than +// 2^kKeybits and the values to be less than 2^kValuebits. The size +// of the table is controlled by kHashbits, and the type of each entry +// in the cache is uintptr_t (native machine word). See also the big +// comment at the top of the file. +template <int kKeybits> +class PackedCache { + public: + typedef uintptr_t T; + typedef uintptr_t K; + typedef uint32 V; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Decrease the size map cache if running in the small memory mode. + static const int kHashbits = 12; +#else + static const int kHashbits = 16; +#endif + static const int kValuebits = 7; + // one bit after value bits + static const int kInvalidMask = 0x80; + + explicit PackedCache() { + COMPILE_ASSERT(kKeybits + kValuebits + 1 <= 8 * sizeof(T), use_whole_keys); + COMPILE_ASSERT(kHashbits <= kKeybits, hash_function); + COMPILE_ASSERT(kHashbits >= kValuebits + 1, small_values_space); + Clear(); + } + + bool TryGet(K key, V* out) const { + // As with other code in this class, we touch array_ as few times + // as we can. Assuming entries are read atomically then certain + // races are harmless. + ASSERT(key == (key & kKeyMask)); + T hash = Hash(key); + T expected_entry = key; + expected_entry &= ~N_ONES_(T, kHashbits); + T entry = array_[hash]; + entry ^= expected_entry; + if (PREDICT_FALSE(entry >= (1 << kValuebits))) { + return false; + } + *out = static_cast<V>(entry); + return true; + } + + void Clear() { + // sets 'invalid' bit in every byte, include value byte + memset(const_cast<T* >(array_), kInvalidMask, sizeof(array_)); + } + + void Put(K key, V value) { + ASSERT(key == (key & kKeyMask)); + ASSERT(value == (value & kValueMask)); + array_[Hash(key)] = KeyToUpper(key) | value; + } + + void Invalidate(K key) { + ASSERT(key == (key & kKeyMask)); + array_[Hash(key)] = KeyToUpper(key) | kInvalidMask; + } + + private: + // we just wipe all hash bits out of key. I.e. clear lower + // kHashbits. We rely on compiler knowing value of Hash(k). + static T KeyToUpper(K k) { + return static_cast<T>(k) ^ Hash(k); + } + + static T Hash(K key) { + return static_cast<T>(key) & N_ONES_(size_t, kHashbits); + } + + // For masking a K. + static const K kKeyMask = N_ONES_(K, kKeybits); + + // For masking a V or a T. + static const V kValueMask = N_ONES_(V, kValuebits); + + // array_ is the cache. Its elements are volatile because any + // thread can write any array element at any time. + volatile T array_[1 << kHashbits]; +}; + +#undef N_ONES_ + +#endif // TCMALLOC_PACKED_CACHE_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/page_heap.cc b/src/third_party/gperftools-2.7/src/page_heap.cc new file mode 100644 index 00000000000..7dd56460627 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/page_heap.cc @@ -0,0 +1,726 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIuPTR +#endif +#include <errno.h> // for ENOMEM, errno +#include <gperftools/malloc_extension.h> // for MallocRange, etc +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static +#include "system-alloc.h" // for TCMalloc_SystemAlloc, etc + +DEFINE_double(tcmalloc_release_rate, + EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), + "Rate at which we release unused memory to the system. " + "Zero means we never release memory back to the system. " + "Increase this flag to return memory faster; decrease it " + "to return memory slower. Reasonable rates are in the " + "range [0,10]"); + +DEFINE_int64(tcmalloc_heap_limit_mb, + EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0), + "Limit total size of the process heap to the " + "specified number of MiB. " + "When we approach the limit the memory is released " + "to the system more aggressively (more minor page faults). " + "Zero means to allocate as long as system allows."); + +namespace tcmalloc { + +PageHeap::PageHeap() + : pagemap_(MetaDataAlloc), + scavenge_counter_(0), + // Start scavenging at kMaxPages list + release_index_(kMaxPages), + aggressive_decommit_(false) { + COMPILE_ASSERT(kClassSizesMax <= (1 << PageMapCache::kValuebits), valuebits); + for (int i = 0; i < kMaxPages; i++) { + DLL_Init(&free_[i].normal); + DLL_Init(&free_[i].returned); + } +} + +Span* PageHeap::SearchFreeAndLargeLists(Length n) { + ASSERT(Check()); + ASSERT(n > 0); + + // Find first size >= n that has a non-empty list + for (Length s = n; s <= kMaxPages; s++) { + Span* ll = &free_[s - 1].normal; + // If we're lucky, ll is non-empty, meaning it has a suitable span. + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); + return Carve(ll->next, n); + } + // Alternatively, maybe there's a usable returned span. + ll = &free_[s - 1].returned; + if (!DLL_IsEmpty(ll)) { + // We did not call EnsureLimit before, to avoid releasing the span + // that will be taken immediately back. + // Calling EnsureLimit here is not very expensive, as it fails only if + // there is no more normal spans (and it fails efficiently) + // or SystemRelease does not work (there is probably no returned spans). + if (EnsureLimit(n)) { + // ll may have became empty due to coalescing + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); + return Carve(ll->next, n); + } + } + } + } + // No luck in free lists, our last chance is in a larger class. + return AllocLarge(n); // May be NULL +} + +static const size_t kForcedCoalesceInterval = 128*1024*1024; + +Span* PageHeap::New(Length n) { + ASSERT(Check()); + ASSERT(n > 0); + + Span* result = SearchFreeAndLargeLists(n); + if (result != NULL) + return result; + + if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0 + && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4 + && (stats_.system_bytes / kForcedCoalesceInterval + != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) { + // We're about to grow heap, but there are lots of free pages. + // tcmalloc's design decision to keep unmapped and free spans + // separately and never coalesce them means that sometimes there + // can be free pages span of sufficient size, but it consists of + // "segments" of different type so page heap search cannot find + // it. In order to prevent growing heap and wasting memory in such + // case we're going to unmap all free pages. So that all free + // spans are maximally coalesced. + // + // We're also limiting 'rate' of going into this path to be at + // most once per 128 megs of heap growth. Otherwise programs that + // grow heap frequently (and that means by small amount) could be + // penalized with higher count of minor page faults. + // + // See also large_heap_fragmentation_unittest.cc and + // https://code.google.com/p/gperftools/issues/detail?id=368 + ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff)); + + // then try again. If we are forced to grow heap because of large + // spans fragmentation and not because of problem described above, + // then at the very least we've just unmapped free but + // insufficiently big large spans back to OS. So in case of really + // unlucky memory fragmentation we'll be consuming virtual address + // space, but not real memory + result = SearchFreeAndLargeLists(n); + if (result != NULL) return result; + } + + // Grow the heap and try again. + if (!GrowHeap(n)) { + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); + // underlying SysAllocator likely set ENOMEM but we can get here + // due to EnsureLimit so we set it here too. + // + // Setting errno to ENOMEM here allows us to avoid dealing with it + // in fast-path. + errno = ENOMEM; + return NULL; + } + return SearchFreeAndLargeLists(n); +} + +Span* PageHeap::AllocLarge(Length n) { + Span *best = NULL; + Span *best_normal = NULL; + + // Create a Span to use as an upper bound. + Span bound; + bound.start = 0; + bound.length = n; + + // First search the NORMAL spans.. + SpanSet::iterator place = large_normal_.upper_bound(SpanPtrWithLength(&bound)); + if (place != large_normal_.end()) { + best = place->span; + best_normal = best; + ASSERT(best->location == Span::ON_NORMAL_FREELIST); + } + + // Try to find better fit from RETURNED spans. + place = large_returned_.upper_bound(SpanPtrWithLength(&bound)); + if (place != large_returned_.end()) { + Span *c = place->span; + ASSERT(c->location == Span::ON_RETURNED_FREELIST); + if (best_normal == NULL + || c->length < best->length + || (c->length == best->length && c->start < best->start)) + best = place->span; + } + + if (best == best_normal) { + return best == NULL ? NULL : Carve(best, n); + } + + // best comes from RETURNED set. + + if (EnsureLimit(n, false)) { + return Carve(best, n); + } + + if (EnsureLimit(n, true)) { + // best could have been destroyed by coalescing. + // best_normal is not a best-fit, and it could be destroyed as well. + // We retry, the limit is already ensured: + return AllocLarge(n); + } + + // If best_normal existed, EnsureLimit would succeeded: + ASSERT(best_normal == NULL); + // We are not allowed to take best from returned list. + return NULL; +} + +Span* PageHeap::Split(Span* span, Length n) { + ASSERT(0 < n); + ASSERT(n < span->length); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->sizeclass == 0); + Event(span, 'T', n); + + const int extra = span->length - n; + Span* leftover = NewSpan(span->start + n, extra); + ASSERT(leftover->location == Span::IN_USE); + Event(leftover, 'U', extra); + RecordSpan(leftover); + pagemap_.set(span->start + n - 1, span); // Update map from pageid to span + span->length = n; + + return leftover; +} + +void PageHeap::CommitSpan(Span* span) { + ++stats_.commit_count; + + TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), + static_cast<size_t>(span->length << kPageShift)); + stats_.committed_bytes += span->length << kPageShift; + stats_.total_commit_bytes += (span->length << kPageShift); +} + +bool PageHeap::DecommitSpan(Span* span) { + ++stats_.decommit_count; + + bool rv = TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift), + static_cast<size_t>(span->length << kPageShift)); + if (rv) { + stats_.committed_bytes -= span->length << kPageShift; + stats_.total_decommit_bytes += (span->length << kPageShift); + } + + return rv; +} + +Span* PageHeap::Carve(Span* span, Length n) { + ASSERT(n > 0); + ASSERT(span->location != Span::IN_USE); + const int old_location = span->location; + RemoveFromFreeList(span); + span->location = Span::IN_USE; + Event(span, 'A', n); + + const int extra = span->length - n; + ASSERT(extra >= 0); + if (extra > 0) { + Span* leftover = NewSpan(span->start + n, extra); + leftover->location = old_location; + Event(leftover, 'S', extra); + RecordSpan(leftover); + + // The previous span of |leftover| was just splitted -- no need to + // coalesce them. The next span of |leftover| was not previously coalesced + // with |span|, i.e. is NULL or has got location other than |old_location|. +#ifndef NDEBUG + const PageID p = leftover->start; + const Length len = leftover->length; + Span* next = GetDescriptor(p+len); + ASSERT (next == NULL || + next->location == Span::IN_USE || + next->location != leftover->location); +#endif + + PrependToFreeList(leftover); // Skip coalescing - no candidates possible + span->length = n; + pagemap_.set(span->start + n - 1, span); + } + ASSERT(Check()); + if (old_location == Span::ON_RETURNED_FREELIST) { + // We need to recommit this address space. + CommitSpan(span); + } + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length == n); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + return span; +} + +void PageHeap::Delete(Span* span) { + ASSERT(Check()); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length > 0); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start + span->length - 1) == span); + const Length n = span->length; + span->sizeclass = 0; + span->sample = 0; + span->location = Span::ON_NORMAL_FREELIST; + Event(span, 'D', span->length); + MergeIntoFreeList(span); // Coalesces if possible + IncrementalScavenge(n); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); +} + +// Given span we're about to free and other span (still on free list), +// checks if 'other' span is mergable with 'span'. If it is, removes +// other span from free list, performs aggressive decommit if +// necessary and returns 'other' span. Otherwise 'other' span cannot +// be merged and is left untouched. In that case NULL is returned. +Span* PageHeap::CheckAndHandlePreMerge(Span* span, Span* other) { + if (other == NULL) { + return other; + } + // if we're in aggressive decommit mode and span is decommitted, + // then we try to decommit adjacent span. + if (aggressive_decommit_ && other->location == Span::ON_NORMAL_FREELIST + && span->location == Span::ON_RETURNED_FREELIST) { + bool worked = DecommitSpan(other); + if (!worked) { + return NULL; + } + } else if (other->location != span->location) { + return NULL; + } + + RemoveFromFreeList(other); + return other; +} + +void PageHeap::MergeIntoFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + + // Coalesce -- we guarantee that "p" != 0, so no bounds checking + // necessary. We do not bother resetting the stale pagemap + // entries for the pieces we are merging together because we only + // care about the pagemap entries for the boundaries. + // + // Note: depending on aggressive_decommit_ mode we allow only + // similar spans to be coalesced. + // + // The following applies if aggressive_decommit_ is enabled: + // + // TODO(jar): "Always decommit" causes some extra calls to commit when we are + // called in GrowHeap() during an allocation :-/. We need to eval the cost of + // that oscillation, and possibly do something to reduce it. + + // TODO(jar): We need a better strategy for deciding to commit, or decommit, + // based on memory usage and free heap sizes. + + const PageID p = span->start; + const Length n = span->length; + + if (aggressive_decommit_ && span->location == Span::ON_NORMAL_FREELIST) { + if (DecommitSpan(span)) { + span->location = Span::ON_RETURNED_FREELIST; + } + } + + Span* prev = CheckAndHandlePreMerge(span, GetDescriptor(p-1)); + if (prev != NULL) { + // Merge preceding span into this span + ASSERT(prev->start + prev->length == p); + const Length len = prev->length; + DeleteSpan(prev); + span->start -= len; + span->length += len; + pagemap_.set(span->start, span); + Event(span, 'L', len); + } + Span* next = CheckAndHandlePreMerge(span, GetDescriptor(p+n)); + if (next != NULL) { + // Merge next span into this span + ASSERT(next->start == p+n); + const Length len = next->length; + DeleteSpan(next); + span->length += len; + pagemap_.set(span->start + span->length - 1, span); + Event(span, 'R', len); + } + + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) + stats_.free_bytes += (span->length << kPageShift); + else + stats_.unmapped_bytes += (span->length << kPageShift); + + if (span->length > kMaxPages) { + SpanSet *set = &large_normal_; + if (span->location == Span::ON_RETURNED_FREELIST) + set = &large_returned_; + std::pair<SpanSet::iterator, bool> p = + set->insert(SpanPtrWithLength(span)); + ASSERT(p.second); // We never have duplicates since span->start is unique. + span->SetSpanSetIterator(p.first); + return; + } + + SpanList* list = &free_[span->length - 1]; + if (span->location == Span::ON_NORMAL_FREELIST) { + DLL_Prepend(&list->normal, span); + } else { + DLL_Prepend(&list->returned, span); + } +} + +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= (span->length << kPageShift); + } else { + stats_.unmapped_bytes -= (span->length << kPageShift); + } + if (span->length > kMaxPages) { + SpanSet *set = &large_normal_; + if (span->location == Span::ON_RETURNED_FREELIST) + set = &large_returned_; + SpanSet::iterator iter = span->ExtractSpanSetIterator(); + ASSERT(iter->span == span); + ASSERT(set->find(SpanPtrWithLength(span)) == iter); + set->erase(iter); + } else { + DLL_Remove(span); + } +} + +void PageHeap::IncrementalScavenge(Length n) { + // Fast path; not yet time to release memory + scavenge_counter_ -= n; + if (scavenge_counter_ >= 0) return; // Not yet time to scavenge + + const double rate = FLAGS_tcmalloc_release_rate; + if (rate <= 1e-6) { + // Tiny release rate means that releasing is disabled. + scavenge_counter_ = kDefaultReleaseDelay; + return; + } + + ++stats_.scavenge_count; + + Length released_pages = ReleaseAtLeastNPages(1); + + if (released_pages == 0) { + // Nothing to scavenge, delay for a while. + scavenge_counter_ = kDefaultReleaseDelay; + } else { + // Compute how long to wait until we return memory. + // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages + // after releasing one page. + const double mult = 1000.0 / rate; + double wait = mult * static_cast<double>(released_pages); + if (wait > kMaxReleaseDelay) { + // Avoid overflow and bound to reasonable range. + wait = kMaxReleaseDelay; + } + scavenge_counter_ = static_cast<int64_t>(wait); + } +} + +Length PageHeap::ReleaseSpan(Span* s) { + ASSERT(s->location == Span::ON_NORMAL_FREELIST); + + if (DecommitSpan(s)) { + RemoveFromFreeList(s); + const Length n = s->length; + s->location = Span::ON_RETURNED_FREELIST; + MergeIntoFreeList(s); // Coalesces if possible. + return n; + } + + return 0; +} + +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + + // Round robin through the lists of free spans, releasing a + // span from each list. Stop after releasing at least num_pages + // or when there is nothing more to release. + while (released_pages < num_pages && stats_.free_bytes > 0) { + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + Span *s; + if (release_index_ > kMaxPages) release_index_ = 0; + + if (release_index_ == kMaxPages) { + if (large_normal_.empty()) { + continue; + } + s = (large_normal_.begin())->span; + } else { + SpanList* slist = &free_[release_index_]; + if (DLL_IsEmpty(&slist->normal)) { + continue; + } + s = slist->normal.prev; + } + // TODO(todd) if the remaining number of pages to release + // is significantly smaller than s->length, and s is on the + // large freelist, should we carve s instead of releasing? + // the whole thing? + Length released_len = ReleaseSpan(s); + // Some systems do not support release + if (released_len == 0) return released_pages; + released_pages += released_len; + } + } + return released_pages; +} + +bool PageHeap::EnsureLimit(Length n, bool withRelease) +{ + Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift; + if (limit == 0) return true; //there is no limit + + // We do not use stats_.system_bytes because it does not take + // MetaDataAllocs into account. + Length takenPages = TCMalloc_SystemTaken >> kPageShift; + //XXX takenPages may be slightly bigger than limit for two reasons: + //* MetaDataAllocs ignore the limit (it is not easy to handle + // out of memory there) + //* sys_alloc may round allocation up to huge page size, + // although smaller limit was ensured + + ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift); + takenPages -= stats_.unmapped_bytes >> kPageShift; + + if (takenPages + n > limit && withRelease) { + takenPages -= ReleaseAtLeastNPages(takenPages + n - limit); + } + + return takenPages + n <= limit; +} + +void PageHeap::RegisterSizeClass(Span* span, uint32 sc) { + // Associate span object with all interior pages as well + ASSERT(span->location == Span::IN_USE); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start+span->length-1) == span); + Event(span, 'C', sc); + span->sizeclass = sc; + for (Length i = 1; i < span->length-1; i++) { + pagemap_.set(span->start+i, span); + } +} + +void PageHeap::GetSmallSpanStats(SmallSpanStats* result) { + for (int i = 0; i < kMaxPages; i++) { + result->normal_length[i] = DLL_Length(&free_[i].normal); + result->returned_length[i] = DLL_Length(&free_[i].returned); + } +} + +void PageHeap::GetLargeSpanStats(LargeSpanStats* result) { + result->spans = 0; + result->normal_pages = 0; + result->returned_pages = 0; + for (SpanSet::iterator it = large_normal_.begin(); it != large_normal_.end(); ++it) { + result->normal_pages += it->length; + result->spans++; + } + for (SpanSet::iterator it = large_returned_.begin(); it != large_returned_.end(); ++it) { + result->returned_pages += it->length; + result->spans++; + } +} + +bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { + Span* span = reinterpret_cast<Span*>(pagemap_.Next(start)); + if (span == NULL) { + return false; + } + r->address = span->start << kPageShift; + r->length = span->length << kPageShift; + r->fraction = 0; + switch (span->location) { + case Span::IN_USE: + r->type = base::MallocRange::INUSE; + r->fraction = 1; + if (span->sizeclass > 0) { + // Only some of the objects in this span may be in use. + const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); + r->fraction = (1.0 * osize * span->refcount) / r->length; + } + break; + case Span::ON_NORMAL_FREELIST: + r->type = base::MallocRange::FREE; + break; + case Span::ON_RETURNED_FREELIST: + r->type = base::MallocRange::UNMAPPED; + break; + default: + r->type = base::MallocRange::UNKNOWN; + break; + } + return true; +} + +static void RecordGrowth(size_t growth) { + StackTrace* t = Static::stacktrace_allocator()->New(); + t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); + t->size = growth; + t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks()); + Static::set_growth_stacks(t); +} + +bool PageHeap::GrowHeap(Length n) { + ASSERT(kMaxPages >= kMinSystemAlloc); + if (n > kMaxValidPages) return false; + Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); + size_t actual_size; + void* ptr = NULL; + if (EnsureLimit(ask)) { + ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + } + if (ptr == NULL) { + if (n < ask) { + // Try growing just "n" pages + ask = n; + if (EnsureLimit(ask)) { + ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + } + } + if (ptr == NULL) return false; + } + ask = actual_size >> kPageShift; + RecordGrowth(ask << kPageShift); + + ++stats_.reserve_count; + ++stats_.commit_count; + + uint64_t old_system_bytes = stats_.system_bytes; + stats_.system_bytes += (ask << kPageShift); + stats_.committed_bytes += (ask << kPageShift); + + stats_.total_commit_bytes += (ask << kPageShift); + stats_.total_reserve_bytes += (ask << kPageShift); + + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + ASSERT(p > 0); + + // If we have already a lot of pages allocated, just pre allocate a bunch of + // memory for the page map. This prevents fragmentation by pagemap metadata + // when a program keeps allocating and freeing large blocks. + + if (old_system_bytes < kPageMapBigAllocationThreshold + && stats_.system_bytes >= kPageMapBigAllocationThreshold) { + pagemap_.PreallocateMoreMemory(); + } + + // Make sure pagemap_ has entries for all of the new pages. + // Plus ensure one before and one after so coalescing code + // does not need bounds-checking. + if (pagemap_.Ensure(p-1, ask+2)) { + // Pretend the new area is allocated and then Delete() it to cause + // any necessary coalescing to occur. + Span* span = NewSpan(p, ask); + RecordSpan(span); + Delete(span); + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); + ASSERT(Check()); + return true; + } else { + // We could not allocate memory within "pagemap_" + // TODO: Once we can return memory to the system, return the new span + return false; + } +} + +bool PageHeap::Check() { + return true; +} + +bool PageHeap::CheckExpensive() { + bool result = Check(); + CheckSet(&large_normal_, kMaxPages + 1, Span::ON_NORMAL_FREELIST); + CheckSet(&large_returned_, kMaxPages + 1, Span::ON_RETURNED_FREELIST); + for (int s = 1; s <= kMaxPages; s++) { + CheckList(&free_[s - 1].normal, s, s, Span::ON_NORMAL_FREELIST); + CheckList(&free_[s - 1].returned, s, s, Span::ON_RETURNED_FREELIST); + } + return result; +} + +bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, + int freelist) { + for (Span* s = list->next; s != list; s = s->next) { + CHECK_CONDITION(s->location == freelist); // NORMAL or RETURNED + CHECK_CONDITION(s->length >= min_pages); + CHECK_CONDITION(s->length <= max_pages); + CHECK_CONDITION(GetDescriptor(s->start) == s); + CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); + } + return true; +} + +bool PageHeap::CheckSet(SpanSet* spanset, Length min_pages,int freelist) { + for (SpanSet::iterator it = spanset->begin(); it != spanset->end(); ++it) { + Span* s = it->span; + CHECK_CONDITION(s->length == it->length); + CHECK_CONDITION(s->location == freelist); // NORMAL or RETURNED + CHECK_CONDITION(s->length >= min_pages); + CHECK_CONDITION(GetDescriptor(s->start) == s); + CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); + } + return true; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/page_heap.h b/src/third_party/gperftools-2.7/src/page_heap.h new file mode 100644 index 00000000000..bf50394faa7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/page_heap.h @@ -0,0 +1,358 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_H_ +#define TCMALLOC_PAGE_HEAP_H_ + +#include <config.h> +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, int64_t, uint16_t +#endif +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "common.h" +#include "packed-cache-inl.h" +#include "pagemap.h" +#include "span.h" + +// We need to dllexport PageHeap just for the unittest. MSVC complains +// that we don't dllexport the PageHeap members, but we don't need to +// test those, so I just suppress this warning. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4251) +#endif + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +// Because we control the definition of GetStackTrace, all clients of +// GetStackTrace should #include us rather than stacktrace.h. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <gperftools/stacktrace.h> +#endif + +namespace base { +struct MallocRange; +} + +namespace tcmalloc { + +// ------------------------------------------------------------------------- +// Map from page-id to per-page data +// ------------------------------------------------------------------------- + +// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. +// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, +// because sometimes the sizeclass is all the information we need. + +// Selector class -- general selector uses 3-level map +template <int BITS> class MapSelector { + public: + typedef TCMalloc_PageMap3<BITS-kPageShift> Type; +}; + +#ifndef TCMALLOC_SMALL_BUT_SLOW +// x86-64 and arm64 are using 48 bits of address space. So we can use +// just two level map, but since initial ram consumption of this mode +// is a bit on the higher side, we opt-out of it in +// TCMALLOC_SMALL_BUT_SLOW mode. +template <> class MapSelector<48> { + public: + typedef TCMalloc_PageMap2<48-kPageShift> Type; +}; + +#endif // TCMALLOC_SMALL_BUT_SLOW + +// A two-level map for 32-bit machines +template <> class MapSelector<32> { + public: + typedef TCMalloc_PageMap2<32-kPageShift> Type; +}; + +// ------------------------------------------------------------------------- +// Page-level allocator +// * Eager coalescing +// +// Heap for page-level allocation. We allow allocating and freeing a +// contiguous runs of pages (called a "span"). +// ------------------------------------------------------------------------- + +class PERFTOOLS_DLL_DECL PageHeap { + public: + PageHeap(); + + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + Span* New(Length n); + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + void Delete(Span* span); + + // Mark an allocated span as being used for small objects of the + // specified size-class. + // REQUIRES: span was returned by an earlier call to New() + // and has not yet been deleted. + void RegisterSizeClass(Span* span, uint32 sc); + + // Split an allocated span into two spans: one of length "n" pages + // followed by another span of length "span->length - n" pages. + // Modifies "*span" to point to the first span of length "n" pages. + // Returns a pointer to the second span. + // + // REQUIRES: "0 < n < span->length" + // REQUIRES: span->location == IN_USE + // REQUIRES: span->sizeclass == 0 + Span* Split(Span* span, Length n); + + // Return the descriptor for the specified page. Returns NULL if + // this PageID was not allocated previously. + inline ATTRIBUTE_ALWAYS_INLINE + Span* GetDescriptor(PageID p) const { + return reinterpret_cast<Span*>(pagemap_.get(p)); + } + + // If this page heap is managing a range with starting page # >= start, + // store info about the range in *r and return true. Else return false. + bool GetNextRange(PageID start, base::MallocRange* r); + + // Page heap statistics + struct Stats { + Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0), committed_bytes(0), + scavenge_count(0), commit_count(0), total_commit_bytes(0), + decommit_count(0), total_decommit_bytes(0), + reserve_count(0), total_reserve_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + uint64_t committed_bytes; // Bytes committed, always <= system_bytes_. + + uint64_t scavenge_count; // Number of times scavagened flush pages + + uint64_t commit_count; // Number of virtual memory commits + uint64_t total_commit_bytes; // Bytes committed in lifetime of process + uint64_t decommit_count; // Number of virtual memory decommits + uint64_t total_decommit_bytes; // Bytes decommitted in lifetime of process + + uint64_t reserve_count; // Number of virtual memory reserves + uint64_t total_reserve_bytes; // Bytes reserved in lifetime of process + }; + inline Stats stats() const { return stats_; } + + struct SmallSpanStats { + // For each free list of small spans, the length (in spans) of the + // normal and returned free lists for that size. + // + // NOTE: index 'i' accounts the number of spans of length 'i + 1'. + int64 normal_length[kMaxPages]; + int64 returned_length[kMaxPages]; + }; + void GetSmallSpanStats(SmallSpanStats* result); + + // Stats for free large spans (i.e., spans with more than kMaxPages pages). + struct LargeSpanStats { + int64 spans; // Number of such spans + int64 normal_pages; // Combined page length of normal large spans + int64 returned_pages; // Combined page length of unmapped spans + }; + void GetLargeSpanStats(LargeSpanStats* result); + + bool Check(); + // Like Check() but does some more comprehensive checking. + bool CheckExpensive(); + bool CheckList(Span* list, Length min_pages, Length max_pages, + int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST + bool CheckSet(SpanSet *s, Length min_pages, int freelist); + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages); + + // Reads and writes to pagemap_cache_ do not require locking. + bool TryGetSizeClass(PageID p, uint32* out) const { + return pagemap_cache_.TryGet(p, out); + } + void SetCachedSizeClass(PageID p, uint32 cl) { + ASSERT(cl != 0); + pagemap_cache_.Put(p, cl); + } + void InvalidateCachedSizeClass(PageID p) { pagemap_cache_.Invalidate(p); } + uint32 GetSizeClassOrZero(PageID p) const { + uint32 cached_value; + if (!TryGetSizeClass(p, &cached_value)) { + cached_value = 0; + } + return cached_value; + } + + bool GetAggressiveDecommit(void) {return aggressive_decommit_;} + void SetAggressiveDecommit(bool aggressive_decommit) { + aggressive_decommit_ = aggressive_decommit; + } + + private: + // Allocates a big block of memory for the pagemap once we reach more than + // 128MB + static const size_t kPageMapBigAllocationThreshold = 128 << 20; + + // Minimum number of pages to fetch from system at a time. Must be + // significantly bigger than kBlockSize to amortize system-call + // overhead, and also to reduce external fragementation. Also, we + // should keep this value big because various incarnations of Linux + // have small limits on the number of mmap() regions per + // address-space. + // REQUIRED: kMinSystemAlloc <= kMaxPages; + static const int kMinSystemAlloc = kMaxPages; + + // Never delay scavenging for more than the following number of + // deallocated pages. With 4K pages, this comes to 4GB of + // deallocation. + static const int kMaxReleaseDelay = 1 << 20; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 4K pages, this comes to 1GB of memory. + static const int kDefaultReleaseDelay = 1 << 18; + + // Pick the appropriate map and cache types based on pointer size + typedef MapSelector<kAddressBits>::Type PageMap; + typedef PackedCache<kAddressBits - kPageShift> PageMapCache; + mutable PageMapCache pagemap_cache_; + PageMap pagemap_; + + // We segregate spans of a given size into two circular linked + // lists: one for normal spans, and one for spans whose memory + // has been returned to the system. + struct SpanList { + Span normal; + Span returned; + }; + + // Sets of spans with length > kMaxPages. + // + // Rather than using a linked list, we use sets here for efficient + // best-fit search. + SpanSet large_normal_; + SpanSet large_returned_; + + // Array mapping from span length to a doubly linked list of free spans + // + // NOTE: index 'i' stores spans of length 'i + 1'. + SpanList free_[kMaxPages]; + + // Statistics on system, free, and unmapped bytes + Stats stats_; + + Span* SearchFreeAndLargeLists(Length n); + + bool GrowHeap(Length n); + + // REQUIRES: span->length >= n + // REQUIRES: span->location != IN_USE + // Remove span from its free list, and move any leftover part of + // span into appropriate free lists. Also update "span" to have + // length exactly "n" and mark it as non-free so it can be returned + // to the client. After all that, decrease free_pages_ by n and + // return span. + Span* Carve(Span* span, Length n); + + void RecordSpan(Span* span) { + pagemap_.set(span->start, span); + if (span->length > 1) { + pagemap_.set(span->start + span->length - 1, span); + } + } + + // Allocate a large span of length == n. If successful, returns a + // span of exactly the specified length. Else, returns NULL. + Span* AllocLarge(Length n); + + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span); + + // Commit the span. + void CommitSpan(Span* span); + + // Decommit the span. + bool DecommitSpan(Span* span); + + // Prepends span to appropriate free list, and adjusts stats. + void PrependToFreeList(Span* span); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span); + + // Incrementally release some memory to the system. + // IncrementalScavenge(n) is called whenever n pages are freed. + void IncrementalScavenge(Length n); + + // Attempts to decommit 's' and move it to the returned freelist. + // + // Returns the length of the Span or zero if release failed. + // + // REQUIRES: 's' must be on the NORMAL freelist. + Length ReleaseSpan(Span *s); + + // Checks if we are allowed to take more memory from the system. + // If limit is reached and allowRelease is true, tries to release + // some unused spans. + bool EnsureLimit(Length n, bool allowRelease = true); + + Span* CheckAndHandlePreMerge(Span *span, Span *other); + + // Number of pages to deallocate before doing more scavenging + int64_t scavenge_counter_; + + // Index of last free list where we released memory to the OS. + int release_index_; + + bool aggressive_decommit_; +}; + +} // namespace tcmalloc + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/src/third_party/gperftools-2.7/src/page_heap_allocator.h b/src/third_party/gperftools-2.7/src/page_heap_allocator.h new file mode 100644 index 00000000000..3fecabdeb44 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/page_heap_allocator.h @@ -0,0 +1,179 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ + +#include <stddef.h> // for NULL, size_t + +#include "common.h" // for MetaDataAlloc +#include "internal_logging.h" // for ASSERT + +namespace tcmalloc { + +// Simple allocator for objects of a specified type. External locking +// is required before accessing one of these objects. +template <class T> +class PageHeapAllocator { + public: + // We use an explicit Init function because these variables are statically + // allocated and their constructors might not have run by the time some + // other static variable tries to allocate memory. + void Init() { + ASSERT(sizeof(T) <= kAllocIncrement); + inuse_ = 0; + free_area_ = NULL; + free_avail_ = 0; + free_list_ = NULL; + // Reserve some space at the beginning to avoid fragmentation. + Delete(New()); + } + + T* New() { + // Consult free list + void* result; + if (free_list_ != NULL) { + result = free_list_; + free_list_ = *(reinterpret_cast<void**>(result)); + } else { + if (free_avail_ < sizeof(T)) { + // Need more room. We assume that MetaDataAlloc returns + // suitably aligned memory. + free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); + if (free_area_ == NULL) { + Log(kCrash, __FILE__, __LINE__, + "FATAL ERROR: Out of memory trying to allocate internal " + "tcmalloc data (bytes, object-size)", + kAllocIncrement, sizeof(T)); + } + free_avail_ = kAllocIncrement; + } + result = free_area_; + free_area_ += sizeof(T); + free_avail_ -= sizeof(T); + } + inuse_++; + return reinterpret_cast<T*>(result); + } + + void Delete(T* p) { + *(reinterpret_cast<void**>(p)) = free_list_; + free_list_ = p; + inuse_--; + } + + int inuse() const { return inuse_; } + + private: + // How much to allocate from system at a time + static const int kAllocIncrement = 128 << 10; + + // Free area from which to carve new objects + char* free_area_; + size_t free_avail_; + + // Free list of already carved objects + void* free_list_; + + // Number of allocated but unfreed objects + int inuse_; +}; + +// STL-compatible allocator which forwards allocations to a PageHeapAllocator. +// +// Like PageHeapAllocator, this requires external synchronization. To avoid multiple +// separate STLPageHeapAllocator<T> from sharing the same underlying PageHeapAllocator<T>, +// the |LockingTag| template argument should be used. Template instantiations with +// different locking tags can safely be used concurrently. +template <typename T, class LockingTag> +class STLPageHeapAllocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template <class T1> struct rebind { + typedef STLPageHeapAllocator<T1, LockingTag> other; + }; + + STLPageHeapAllocator() { } + STLPageHeapAllocator(const STLPageHeapAllocator&) { } + template <class T1> STLPageHeapAllocator(const STLPageHeapAllocator<T1, LockingTag>&) { } + ~STLPageHeapAllocator() { } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + size_type max_size() const { return size_t(-1) / sizeof(T); } + + void construct(pointer p, const T& val) { ::new(p) T(val); } + void construct(pointer p) { ::new(p) T(); } + void destroy(pointer p) { p->~T(); } + + // There's no state, so these allocators are always equal + bool operator==(const STLPageHeapAllocator&) const { return true; } + bool operator!=(const STLPageHeapAllocator&) const { return false; } + + pointer allocate(size_type n, const void* = 0) { + if (!underlying_.initialized) { + underlying_.allocator.Init(); + underlying_.initialized = true; + } + + CHECK_CONDITION(n == 1); + return underlying_.allocator.New(); + } + void deallocate(pointer p, size_type n) { + CHECK_CONDITION(n == 1); + underlying_.allocator.Delete(p); + } + + private: + struct Storage { + explicit Storage(base::LinkerInitialized x) {} + PageHeapAllocator<T> allocator; + bool initialized; + }; + static Storage underlying_; +}; + +template<typename T, class LockingTag> +typename STLPageHeapAllocator<T, LockingTag>::Storage STLPageHeapAllocator<T, LockingTag>::underlying_(base::LINKER_INITIALIZED); + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ diff --git a/src/third_party/gperftools-2.7/src/pagemap.h b/src/third_party/gperftools-2.7/src/pagemap.h new file mode 100644 index 00000000000..68b2d24c185 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/pagemap.h @@ -0,0 +1,328 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A data structure used by the caching malloc. It maps from page# to +// a pointer that contains info about that page. We use two +// representations: one for 32-bit addresses, and another for 64 bit +// addresses. Both representations provide the same interface. The +// first representation is implemented as a flat array, the seconds as +// a three-level radix tree that strips away approximately 1/3rd of +// the bits every time. +// +// The BITS parameter should be the number of bits required to hold +// a page number. E.g., with 32 bit pointers and 4K pages (i.e., +// page offset fits in lower 12 bits), BITS == 20. + +#ifndef TCMALLOC_PAGEMAP_H_ +#define TCMALLOC_PAGEMAP_H_ + +#include "config.h" + +#include <stddef.h> // for NULL, size_t +#include <string.h> // for memset +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include "internal_logging.h" // for ASSERT + +// Single-level array +template <int BITS> +class TCMalloc_PageMap1 { + private: + static const int LENGTH = 1 << BITS; + + void** array_; + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) { + array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS)); + memset(array_, 0, sizeof(void*) << BITS); + } + + // Ensure that the map contains initialized entries "x .. x+n-1". + // Returns true if successful, false if we could not allocate memory. + bool Ensure(Number x, size_t n) { + // Nothing to do since flat array was allocated at start. All + // that's left is to check for overflow (that is, we don't want to + // ensure a number y where array_[y] would be an out-of-bounds + // access). + return n <= LENGTH - x; // an overflow-free way to do "x + n <= LENGTH" + } + + void PreallocateMoreMemory() {} + + // Return the current value for KEY. Returns NULL if not yet set, + // or if k is out of range. + ATTRIBUTE_ALWAYS_INLINE + void* get(Number k) const { + if ((k >> BITS) > 0) { + return NULL; + } + return array_[k]; + } + + // REQUIRES "k" is in range "[0,2^BITS-1]". + // REQUIRES "k" has been ensured before. + // + // Sets the value 'v' for key 'k'. + void set(Number k, void* v) { + array_[k] = v; + } + + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } +}; + +// Two-level radix tree +template <int BITS> +class TCMalloc_PageMap2 { + private: + static const int LEAF_BITS = (BITS + 1) / 2; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + static const int ROOT_BITS = BITS - LEAF_BITS; + static const int ROOT_LENGTH = 1 << ROOT_BITS; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Leaf* root_[ROOT_LENGTH]; // Pointers to child nodes + void* (*allocator_)(size_t); // Memory allocator + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) { + allocator_ = allocator; + memset(root_, 0, sizeof(root_)); + } + + ATTRIBUTE_ALWAYS_INLINE + void* get(Number k) const { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || root_[i1] == NULL) { + return NULL; + } + return root_[i1]->values[i2]; + } + + void set(Number k, void* v) { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + ASSERT(i1 < ROOT_LENGTH); + root_[i1]->values[i2] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> LEAF_BITS; + + // Check for overflow + if (i1 >= ROOT_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_[i1] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_[i1] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + // Allocate enough to keep track of all possible pages + if (BITS < 20) { + Ensure(0, Number(1) << BITS); + } + } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> LEAF_BITS; + Leaf* leaf = root_[i1]; + if (leaf != NULL) { + // Scan forward in leaf + for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { + if (leaf->values[i2] != NULL) { + return leaf->values[i2]; + } + } + } + // Skip to next top-level entry + k = (i1 + 1) << LEAF_BITS; + } + return NULL; + } +}; + +// Three-level radix tree +template <int BITS> +class TCMalloc_PageMap3 { + private: + // How many bits should we consume at each interior level + static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up + static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS; + + // How many bits should we consume at leaf level + static const int LEAF_BITS = BITS - 2*INTERIOR_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Interior node + struct Node { + Node* ptrs[INTERIOR_LENGTH]; + }; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Node root_; // Root of radix tree + void* (*allocator_)(size_t); // Memory allocator + + Node* NewNode() { + Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node))); + if (result != NULL) { + memset(result, 0, sizeof(*result)); + } + return result; + } + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) { + allocator_ = allocator; + memset(&root_, 0, sizeof(root_)); + } + + ATTRIBUTE_ALWAYS_INLINE + void* get(Number k) const { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || + root_.ptrs[i1] == NULL || root_.ptrs[i1]->ptrs[i2] == NULL) { + return NULL; + } + return reinterpret_cast<Leaf*>(root_.ptrs[i1]->ptrs[i2])->values[i3]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + reinterpret_cast<Leaf*>(root_.ptrs[i1]->ptrs[i2])->values[i3] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1); + + // Check for overflow + if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_.ptrs[i1] == NULL) { + Node* n = NewNode(); + if (n == NULL) return false; + root_.ptrs[i1] = n; + } + + // Make leaf node if necessary + if (root_.ptrs[i1]->ptrs[i2] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_.ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf); + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + if (root_.ptrs[i1] == NULL) { + // Advance to next top-level entry + k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); + } else { + Leaf* leaf = reinterpret_cast<Leaf*>(root_.ptrs[i1]->ptrs[i2]); + if (leaf != NULL) { + for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { + if (leaf->values[i3] != NULL) { + return leaf->values[i3]; + } + } + } + // Advance to next interior entry + k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; + } + } + return NULL; + } +}; + +#endif // TCMALLOC_PAGEMAP_H_ diff --git a/src/third_party/gperftools-2.7/src/pprof b/src/third_party/gperftools-2.7/src/pprof new file mode 100755 index 00000000000..9e18fc63168 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/pprof @@ -0,0 +1,5580 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# <count> <stack trace> +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/pprof "program" "profile" +# Enters "interactive" mode +# +# % tools/pprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/pprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/pprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; +use Cwd; +use POSIX; + +my $PPROF_VERSION = "2.0"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the PPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); +# These are used for dynamic profiles +my @URL_FETCHER = ("curl", "-s"); + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +my @stackTraces; + +##### Argument parsing ##### + +sub usage_string { + return <<EOF; +Usage: +$0 [options] <program> <profiles> + <profiles> is a space separated list of profile names. +$0 [options] <symbolized-profiles> + <symbolized-profiles> is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +$0 [options] <profile> + <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/<service>] - a location of a service to get profile from + + The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: + $0 http://myserver.com:80$HEAP_PAGE + If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +$0 --symbols <program> + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base=<base> Subtract <base> from <profile> before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds=<n> Length of time for dynamic profiles [default=30 secs] + --add_lib=<file> Read additional symbols and line info from the given library + --lib_prefix=<dir> Comma separated list of library path prefixes + --no_strip_temp Do not strip template arguments from function names + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --stacks Generate stack traces similar to the heap profiler (requires --text) + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list=<regexp> Generate source listing of matching routines + --disasm=<regexp> Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postscript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized pprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount=<n> Show at most so many nodes [default=80] + --nodefraction=<f> Hide nodes below <f>*total [default=.005] + --edgefraction=<f> Hide edges below <f>*total [default=.001] + --maxdegree=<n> Max incoming/outgoing edges per node [default=8] + --focus=<regexp> Focus on nodes matching <regexp> + --ignore=<regexp> Ignore nodes matching <regexp> + --scale=<n> Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + +Miscellaneous: + --no-auto-signal-frm Automatically drop 2nd frame that is always same (cpu-only) + (assuming that it is artifact of bad stack captures + which include signal handler frames) + --show_addresses Always show addresses when applicable + --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + +Environment Variables: + PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof + PPROF_TOOLS Prefix for object tools pathnames + +Examples: + +$0 /bin/ls ls.prof + Enters "interactive" mode +$0 --text /bin/ls ls.prof + Outputs one line per procedure +$0 --web /bin/ls ls.prof + Displays annotated call-graph in web browser +$0 --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +$0 --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +$0 --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +$0 --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +$0 --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +$0 http://localhost:1234/ + Enters "interactive" mode +$0 --text localhost:1234 + Outputs one line per procedure for localhost:1234 +$0 --raw localhost:1234 > ./local.raw +$0 --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return <<EOF +pprof (part of gperftools $PPROF_VERSION) + +Copyright 1998-2007 Google Inc. + +This is BSD licensed software; see the source for copying conditions +and license information. +There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. +EOF +} + +sub usage { + my $msg = shift; + print STDERR "$msg\n\n"; + print STDERR usage_string(); + exit(1); +} + +sub Init() { + # Setup tmp-file name and handler to clean it up. + # We do this in the very beginning so that we can use + # error() and cleanup() function anytime here after. + $main::tmpfile_sym = "/tmp/pprof$$.sym"; + $main::tmpfile_ps = "/tmp/pprof$$"; + $main::next_tmpfile = 0; + $SIG{'INT'} = \&sighandler; + + # Cache from filename/linenumber to source code + $main::source_cache = (); + + $main::opt_help = 0; + $main::opt_version = 0; + $main::opt_show_addresses = 0; + $main::opt_no_auto_signal_frames = 0; + + $main::opt_cum = 0; + $main::opt_base = ''; + $main::opt_addresses = 0; + $main::opt_lines = 0; + $main::opt_functions = 0; + $main::opt_files = 0; + $main::opt_lib_prefix = ""; + + $main::opt_text = 0; + $main::opt_stacks = 0; + $main::opt_callgrind = 0; + $main::opt_list = ""; + $main::opt_disasm = ""; + $main::opt_symbols = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + $main::opt_dot = 0; + $main::opt_ps = 0; + $main::opt_pdf = 0; + $main::opt_gif = 0; + $main::opt_svg = 0; + $main::opt_raw = 0; + $main::opt_collapsed = 0; + + $main::opt_nodecount = 80; + $main::opt_nodefraction = 0.005; + $main::opt_edgefraction = 0.001; + $main::opt_maxdegree = 8; + $main::opt_focus = ''; + $main::opt_ignore = ''; + $main::opt_scale = 0; + $main::opt_heapcheck = 0; + $main::opt_seconds = 30; + $main::opt_lib = ""; + + $main::opt_inuse_space = 0; + $main::opt_inuse_objects = 0; + $main::opt_alloc_space = 0; + $main::opt_alloc_objects = 0; + $main::opt_show_bytes = 0; + $main::opt_drop_negative = 0; + $main::opt_interactive = 0; + + $main::opt_total_delay = 0; + $main::opt_contentions = 0; + $main::opt_mean_delay = 0; + + $main::opt_tools = ""; + $main::opt_debug = 0; + $main::opt_test = 0; + + # Do not strip template argument in function names + $main::opt_no_strip_temp = 0; + + # These are undocumented flags used only by unittests. + $main::opt_test_stride = 0; + + # Are we using $SYMBOL_PAGE? + $main::use_symbol_page = 0; + + # Files returned by TempName. + %main::tempnames = (); + + # Type of profile we are dealing with + # Supported types: + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" + + GetOptions("help!" => \$main::opt_help, + "version!" => \$main::opt_version, + "show_addresses!"=> \$main::opt_show_addresses, + "no-auto-signal-frm!"=> \$main::opt_no_auto_signal_frames, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "stacks!" => \$main::opt_stacks, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "no_strip_temp!" => \$main::opt_no_strip_temp, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_collapsed + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (@ARGV > 0) { + if (IsProfileURL($ARGV[0])) { + printf STDERR "Using remote profile at $ARGV[0].\n"; + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_lib_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Get total data in profile + my $total = TotalProfile($profile); + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total: %s %s\n", Unparse($total), Units()); + } + if ($main::opt_stacks) { + printf("Stacks:\n\n"); + PrintStacksForText($symbols, $profile); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system(ShellEscape(@EVINCE, $fname) . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + if (`uname` =~ /MINGW/) { + # Windows(MinGW): open will use standard preference for SVG files. + system("cmd", "/c", "start", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to pprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'pprof'; + while ( defined ($_ = $term->readline('(pprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(pprof) "; + $_ = <STDIN>; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($2); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $total_count = shift; + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <<ENDOFHELP; +Interactive pprof mode + +Commands: + gv + gv [focus] [-ignore1] [-ignore2] + Show graphical hierarchical display of current profile. Without + any arguments, shows all samples in the profile. With the optional + "focus" argument, restricts the samples shown to just those where + the "focus" regular expression matches a routine name on the stack + trace. + + web + web [focus] [-ignore1] [-ignore2] + Like GV, but displays profile in your web browser instead of using + Ghostview. Works best if your web browser is already running. + To change the browser that gets used: + On Linux, set the /etc/alternatives/gnome-www-browser symlink. + On OS X, change the Finder association for SVG files. + + list [routine_regexp] [-ignore1] [-ignore2] + Show source listing of routines whose names match "routine_regexp" + + weblist [routine_regexp] [-ignore1] [-ignore2] + Displays a source listing of routines whose names match "routine_regexp" + in a web browser. You can click on source lines to view the + corresponding disassembly. + + top [--cum] [-ignore1] [-ignore2] + top20 [--cum] [-ignore1] [-ignore2] + top37 [--cum] [-ignore1] [-ignore2] + Show top lines ordered by flat profile count, or cumulative count + if --cum is specified. If a number is present after 'top', the + top K routines will be shown (defaults to showing the top 10) + + disasm [routine_regexp] [-ignore1] [-ignore2] + Show disassembly of routines whose names match "routine_regexp", + annotated with sample counts. + + callgrind + callgrind [filename] + Generates callgrind file. If no filename is given, kcachegrind is called. + + help - This listing + quit or ^D - End pprof + +For commands that accept optional -ignore tags, samples where any routine in +the stack trace matches the regular expression in any of the -ignore +parameters will be ignored. + +Further pprof details are available at this location (or one similar): + + /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html + /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html + +ENDOFHELP +} +sub ParseInteractiveArgs { + my $args = shift; + my $focus = ""; + my $ignore = ""; + my @x = split(/ +/, $args); + foreach $a (@x) { + if ($a =~ m/^(--|-)lines$/) { + $main::opt_lines = 1; + } elsif ($a =~ m/^(--|-)cum$/) { + $main::opt_cum = 1; + } elsif ($a =~ m/^-(.*)/) { + $ignore .= (($ignore ne "") ? "|" : "" ) . $1; + } else { + $focus .= (($focus ne "") ? "|" : "" ) . $a; + } + } + if ($ignore ne "") { + print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; + } + return ($focus, $ignore); +} + +##### Output code ##### + +sub TempName { + my $fnum = shift; + my $ext = shift; + my $file = "$main::tmpfile_ps.$fnum.$ext"; + $main::tempnames{$file} = 1; + return $file; +} + +# Print profile data in packed binary format (64-bit) to standard out +sub PrintProfileData { + my $profile = shift; + my $big_endian = pack("L", 1) eq pack("N", 1); + # print header (64-bit style) + # (zero) (header-size) (version) (sample-period) (zero) + if ($big_endian) { + print pack('L*', 0, 0, 0, 3, 0, 0, 0, 1, 0, 0); + } + else { + print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0); + } + + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + if ($big_endian) { + print pack('L*', int($count / 2**32), $count & 0xFFFFFFFF); + print pack('L*', int($depth / 2**32), $depth & 0xFFFFFFFF); + } + else { + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + } + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + if ($big_endian) { + print pack('L*', hex('0x' . $high_addr), hex('0x' . $low_addr)); + } + else { + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while (<SRC>) { + print $_; + } + close(SRC); + } else { + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $line_limit = shift; + + if ($main::opt_stacks && @stackTraces) { + foreach (sort { (split " ", $b)[1] <=> (split " ", $a)[1]; } @stackTraces) { + print "$_\n" if $main::opt_debug; + my ($n1, $s1, $n2, $s2, @addrs) = split; + print "Leak of $s1 bytes in $n1 objects allocated from:\n"; + foreach my $pcstr (@addrs) { + $pcstr =~ s/^0x//; + my $sym; + if (! defined $symbols->{$pcstr}) { + $sym = "unknown"; + } else { + $sym = "$symbols->{$pcstr}[0] $symbols->{$pcstr}[1]"; + } + print "\t@ $pcstr $sym\n"; + } + } + print "\n"; + } + + my $total = TotalProfile($flat); + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">$filename"); + print CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + print CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + print CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); + if (defined $6) { + print CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + print CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); + print CG ("calls=$count $callee_line\n"); + } + print CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + + my $total = TotalProfile($flat); + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # <filename>:<linenumber> + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $list_opts +sub PrintListing { + my $total = shift; + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + my $html = shift; + + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); + last; + } + $addr = AddressInc($addr); + } + } + } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; +<DOCTYPE html> +<html> +<head> +<title>Pprof listing</title> +<style type="text/css"> +body { + font-family: sans-serif; +} +h1 { + font-size: 1.5em; + margin-bottom: 4px; +} +.legend { + font-size: 1.25em; +} +.line { + color: #aaaaaa; +} +.nop { + color: #aaaaaa; +} +.unimportant { + color: #cccccc; +} +.disasmloc { + color: #000000; +} +.deadsrc { + cursor: pointer; +} +.deadsrc:hover { + background-color: #eeeeee; +} +.livesrc { + color: #0000ff; + cursor: pointer; +} +.livesrc:hover { + background-color: #eeeeee; +} +.asm { + color: #008800; + display: none; +} +</style> +<script type="text/javascript"> +function pprof_toggle_asm(e) { + var target; + if (!e) e = window.event; + if (e.target) target = e.target; + else if (e.srcElement) target = e.srcElement; + + if (target) { + var asm = target.nextSibling; + if (asm && asm.className == "asm") { + asm.style.display = (asm.style.display == "block" ? "" : "block"); + e.preventDefault(); + return false; + } + } +} +</script> +</head> +<body> +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; +</body> +</html> +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s/</</g; + $text =~ s/>/>/g; + return $text; +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $html = shift; + my $output = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return 0; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + my $first_indentation = -1; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("<span class=disasmloc>%s:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("<span class=unimportant>%s</span>" . + "<span class=disasmloc>:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("<span class=unimportant>%s:%d</span>", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } + + if ($html) { + printf $output ( + "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" . + "Total:%6s %6s (flat / cumulative %s)\n", + HtmlEscape(ShortFunctionName($routine)), + HtmlEscape(CleanFileName($filename)), + Unparse($total1), + Unparse($total2), + Units()); + } else { + printf $output ( + "ROUTINE ====================== %s in %s\n" . + "%6s %6s Total %s (flat / cumulative)\n", + ShortFunctionName($routine), + CleanFileName($filename), + Unparse($total1), + Unparse($total2), + Units()); + } + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + if ($l >= $firstline - 5 && + (($l <= $oldlastline + 5) || ($l <= $lastline))) { + chop; + my $text = $_; + if ($l == $firstline) { print $output $skip_marker; } + my $n1 = GetEntry($samples1, $l); + my $n2 = GetEntry($samples2, $l); + if ($html) { + # Emit a span that has one of the following classes: + # livesrc -- has samples + # deadsrc -- has disassembly, but with no samples + # nop -- has no matching disasembly + # Also emit an optional span containing disassembly. + my $dis = $disasm{$l}; + my $asm = ""; + if (defined($dis) && $dis ne '') { + $asm = "<span class=\"asm\">" . $dis . "</span>"; + } + my $source_class = (($n1 + $n2 > 0) + ? "livesrc" + : (($asm ne "") ? "deadsrc" : "nop")); + printf $output ( + "<span class=\"line\">%5d</span> " . + "<span class=\"%s\">%6s %6s %s</span>%s\n", + $l, $source_class, + HtmlPrintNumber($n1), + HtmlPrintNumber($n2), + HtmlEscape($text), + $asm); + } else { + printf $output( + "%6s %6s %4d: %s\n", + UnparseAlt($n1), + UnparseAlt($n2), + $l, + $text); + } + if ($l == $lastline) { print $output $skip_marker; } + }; + } + close(FILE); + if ($html) { + print $output "</pre>\n"; + } + return 1; +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while (<FILE>) { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); + if ($main::opt_gv) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; + } elsif ($main::opt_evince) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; + } elsif ($main::opt_ps) { + $output = "| $escaped_dot -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; + } elsif ($main::opt_gif) { + $output = "| $escaped_dot -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $local_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $local_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = <SVG>; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # <svg width="___" height="___" + # viewBox="___" xmlns=...> + # <g id="graph0" transform="..."> + # ... + # </g> + # </svg> + # + # Change it to + # + # <svg width="100%" height="100%" + # xmlns=...> + # $svg_javascript + # <g id="viewport" transform="translate(0,0)"> + # <g id="graph0" transform="..."> + # ... + # </g> + # </g> + # </svg> + + # Fix width, height; drop viewBox. + $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/; + + # Insert script, viewport <g> above first <g> + my $svg_javascript = SvgJavascript(); + my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n"; + $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/; + + # Insert final </g> above </svg>. + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/; + + if ($main::opt_svg) { + # --svg: write to standard output. + print $svg; + } else { + # Write back to temporary file. + open(SVG, ">$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; +<script type="text/ecmascript"><![CDATA[ +// SVGPan +// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/ +// Local modification: if(true || ...) below to force panning, never moving. + +/** + * SVGPan library 1.2 + * ==================== + * + * Given an unique existing element with id "viewport", including the + * the library into any SVG adds the following capabilities: + * + * - Mouse panning + * - Mouse zooming (using the wheel) + * - Object dargging + * + * Known issues: + * + * - Zooming (while panning) on Safari has still some issues + * + * Releases: + * + * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui + * Fixed a bug with browser mouse handler interaction + * + * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui + * Updated the zoom code to support the mouse wheel on Safari/Chrome + * + * 1.0, Andrea Leofreddi + * First release + * + * This code is licensed under the following BSD license: + * + * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those of the + * authors and should not be interpreted as representing official policies, either expressed + * or implied, of Andrea Leofreddi. + */ + +var root = document.documentElement; + +var state = 'none', stateTarget, stateOrigin, stateTf; + +setupHandlers(root); + +/** + * Register handlers + */ +function setupHandlers(root){ + setAttributes(root, { + "onmouseup" : "add(evt)", + "onmousedown" : "handleMouseDown(evt)", + "onmousemove" : "handleMouseMove(evt)", + "onmouseup" : "handleMouseUp(evt)", + //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element + }); + + if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0) + window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari + else + window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others + + var g = svgDoc.getElementById("svg"); + g.width = "100%"; + g.height = "100%"; +} + +/** + * Instance an SVGPoint object with given event coordinates. + */ +function getEventPoint(evt) { + var p = root.createSVGPoint(); + + p.x = evt.clientX; + p.y = evt.clientY; + + return p; +} + +/** + * Sets the current transform matrix of an element. + */ +function setCTM(element, matrix) { + var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")"; + + element.setAttribute("transform", s); +} + +/** + * Dumps a matrix to a string (useful for debug). + */ +function dumpMatrix(matrix) { + var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]"; + + return s; +} + +/** + * Sets attributes of an element. + */ +function setAttributes(element, attributes){ + for (i in attributes) + element.setAttributeNS(null, i, attributes[i]); +} + +/** + * Handle mouse move event. + */ +function handleMouseWheel(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var delta; + + if(evt.wheelDelta) + delta = evt.wheelDelta / 3600; // Chrome/Safari + else + delta = evt.detail / -90; // Mozilla + + var z = 1 + delta; // Zoom factor: 0.9/1.1 + + var g = svgDoc.getElementById("viewport"); + + var p = getEventPoint(evt); + + p = p.matrixTransform(g.getCTM().inverse()); + + // Compute new scale matrix in current mouse position + var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y); + + setCTM(g, g.getCTM().multiply(k)); + + stateTf = stateTf.multiply(k.inverse()); +} + +/** + * Handle mouse move event. + */ +function handleMouseMove(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(state == 'pan') { + // Pan mode + var p = getEventPoint(evt).matrixTransform(stateTf); + + setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y)); + } else if(state == 'move') { + // Move mode + var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse()); + + setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM())); + + stateOrigin = p; + } +} + +/** + * Handle click event. + */ +function handleMouseDown(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(true || evt.target.tagName == "svg") { + // Pan mode + state = 'pan'; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } else { + // Move mode + state = 'move'; + + stateTarget = evt.target; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } +} + +/** + * Handle mouse button release event. + */ +function handleMouseUp(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + if(state == 'pan' || state == 'move') { + // Quit pan mode + state = ''; + } +} + +]]></script> +EOF +} + +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } elsif (!$main::opt_show_addresses) { + push(@result, "$func $fileline"); + } else { + push(@result, "$func $fileline ($a)"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } elsif (!$main::opt_show_addresses) { + push(@result, $func); + } else { + push(@result, "$func ($a)"); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolLocationInlineStack { + my $symbols = shift; + my $address = shift; + my $stack = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-2]; + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@$stack, $file.":".$fn.$suffix); + } + } + else { + push (@$stack, "??:0:unknown"); + } +} + +sub ExtractSymbolNameInlineStack { + my $symbols = shift; + my $address = shift; + + my @stack = (); + + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-0]; + + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + if ($fn eq '??') { + # If we can't get the symbol name, at least use the file information. + $fn = $file; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@stack, $fn.$suffix); + } + } + else { + # If we can't get a symbol name, at least fill in the address. + push (@stack, $address); + } + + return @stack; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?" || $file eq ":0") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my @stack = (); + ExtractSymbolLocationInlineStack($symbols, $address[0], \@stack); + for (my $i = 1; $i <= $#address; $i++) { + ExtractSymbolLocationInlineStack($symbols, $address[$i], \@stack); + } + AddEntry($calls, $stack[0], $count); + for (my $i = 1; $i < $#address; $i++) { + AddEntry($calls, "$stack[$i] -> $stack[$i-1]", $count); + } + } + return $calls; +} + +sub PrintStacksForText { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + for (my $i = 0; $i <= $#address; $i++) { + $address[$i] = sprintf("(%s) %s", $address[$i], ExtractSymbolLocation($symbols, $address[$i])); + } + printf("%-8d %s\n\n", $count, join("\n ", @address)); + } +} + +sub PrintCollapsedStacks { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address ); + printf("%s %d\n", join(";", @names), $count); + } +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('calloc', + 'cfree', + 'malloc', + 'free', + 'memalign', + 'posix_memalign', + 'pvalloc', + 'valloc', + 'realloc', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu' && !$main::opt_no_auto_signal_frames) { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + if ($main::opt_no_auto_signal_frames) { + print STDERR "All second stack frames are same: `$second_pc'.\nMight be stack trace capturing bug.\n"; + last; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $topaddr = POSIX::strtoul($addrs[0], 16); + splice @addrs, 1, 1; + if ($#addrs > 1) { + my $subtopaddr = POSIX::strtoul($addrs[1], 16); + if ($subtopaddr + 1 == $topaddr) { + splice @addrs, 1, 1; + } + } + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); + my $line = <SYMBOL>; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = ShellEscape(@URL_FETCHER, $url); + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = <CMDLINE>; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); + open(CMDLINE, "$command_line |") or error($command_line); + while (<CMDLINE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER. Returns a new list. +sub AddFetchTimeout { + my $timeout = shift; + my @fetcher = @_; + if (defined($timeout)) { + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); + } + } + return @fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=<value>' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); + } else { + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + } + + my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to pprof, which pprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = <PROFILE>)) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + if ($main::use_symbolized_profile) { + return $stack; + } else { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; + } + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * ($address_length / 2), 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053 + # There are two pairs <count: size>, the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ <heap-url>_v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + # Read the /proc/self/maps data + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + last; + } + + if (/^--- Memory map:/) { + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=<dir>" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + last; + } + + # Read entry of the form: + # <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + + my @counts = ($n1, $s1, $n2, $s2); + $stack = FixCallerAddresses($stack); + push @stackTraces, "$n1 $s1 $n2 $s2 $stack"; + AddEntries($profile, $pcs, $stack, $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = <PROFILE> ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. +sub HexExtend { + my $addr = shift; + + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; + } + return ("0" x $zeros_needed) . $addr; +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + if ($file =~ m|^/| && -f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } + if ($file =~ m|^/| && -f "/usr/lib/debug$file.debug") { + return "/usr/lib/debug$file.debug"; + } + return undef; +} + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line (<OTOOL>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = Cwd::abs_path(shift); + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(.+\.(so|dll|dylib|bundle|node)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) { + # PIEs and address space randomization do not play well with our + # default assumption that main executable is at lowest + # addresses. So we're detecting main executable in + # /proc/self/maps as well. + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # For libc (and other) libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat", $main::tmpfile_sym); + print("---- $cmd ---\n"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); + print("----\n"); + } + + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while (<SYMBOLS>) { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = <SYMBOLS>; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + # Remove discriminator markers as this comes after the line number and + # confuses the rest of this script. + $filelinenum =~ s/ \(discriminator \d+\)$//; + # Convert unknown line numbers into line 0. + $filelinenum =~ s/:\?$/:0/; + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template paramters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, so this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + $function =~ s/<[0-9a-f]*>$//g; # Remove Address + if (!$main::opt_no_strip_temp) { + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + } + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) PPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as pprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# PPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $PPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # <tool>:<path>. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$PPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # pprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/pprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " $0 \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $escaped_nm_command = shift; # shell-escaped + my $regexp = shift; + my $image = shift; + + my $symbol_table = {}; + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while (<NM>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + + # Verify if addr2line can find the $sep_symbol. If not, we use objdump + # to find the address for the $sep_symbol on code section which addr2line + # can find. + if (defined($sep_address)){ + my $start_val = $sep_address; + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image, "-i"); + open(FINI, "echo $start_val | $cmd |") + || error("echo $start_val | $cmd: $!\n"); + $_ = <FINI>; + s/\r?\n$//g; + my $fini = $_; + close(FINI); + if ($fini ne $sep_symbol){ + my $objdump = $obj_tool_map{"objdump"}; + $cmd = ShellEscape($objdump, "-d", $image); + my $grep = ShellEscape("grep", $sep_symbol); + my $tail = ShellEscape("tail", "-n", "1"); + open(FINI, "$cmd | $grep | $tail |") + || error("$cmd | $grep | $tail: $!\n"); + s/\r//g; # turn windows-looking lines into unix-looking lines + my $data = <FINI>; + if (defined($data)){ + ($start_val, $fini) = split(/ </,$data); + } + close(FINI); + } + $sep_address = HexExtend($start_val); + } + + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurrence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) { + # In this mode, we do "nm --demangle <foo>" + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { + # In this mode, we do "nm <foo> | c++filt" + $cppfilt_flag = " | " . ShellEscape($cppfilt); + }; + my $flatten_flag = ""; + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + # 6nm is for Go binaries + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp, $image); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by pprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} diff --git a/src/third_party/gperftools-2.7/src/profile-handler.cc b/src/third_party/gperftools-2.7/src/profile-handler.cc new file mode 100644 index 00000000000..7fdcb69333f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/profile-handler.cc @@ -0,0 +1,584 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Nabeel Mian +// +// Implements management of profile timers and the corresponding signal handler. + +#include "config.h" +#include "profile-handler.h" + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +#include <stdio.h> +#include <errno.h> +#include <sys/time.h> + +#include <list> +#include <string> + +#if HAVE_LINUX_SIGEV_THREAD_ID +// for timer_{create,settime} and associated typedefs & constants +#include <time.h> +// for sys_gettid +#include "base/linux_syscall_support.h" +// for perftools_pthread_key_create +#include "maybe_threads.h" +#endif + +#include "base/dynamic_annotations.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_threads.h" + +using std::list; +using std::string; + +// This structure is used by ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback as a handle to a registered callback. +struct ProfileHandlerToken { + // Sets the callback and associated arg. + ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg) + : callback(cb), + callback_arg(cb_arg) { + } + + // Callback function to be invoked on receiving a profile timer interrupt. + ProfileHandlerCallback callback; + // Argument for the callback function. + void* callback_arg; +}; + +// Blocks a signal from being delivered to the current thread while the object +// is alive. Unblocks it upon destruction. +class ScopedSignalBlocker { + public: + ScopedSignalBlocker(int signo) { + sigemptyset(&sig_set_); + sigaddset(&sig_set_, signo); + RAW_CHECK(sigprocmask(SIG_BLOCK, &sig_set_, NULL) == 0, + "sigprocmask (block)"); + } + ~ScopedSignalBlocker() { + RAW_CHECK(sigprocmask(SIG_UNBLOCK, &sig_set_, NULL) == 0, + "sigprocmask (unblock)"); + } + + private: + sigset_t sig_set_; +}; + +// This class manages profile timers and associated signal handler. This is a +// a singleton. +class ProfileHandler { + public: + // Registers the current thread with the profile handler. + void RegisterThread(); + + // Registers a callback routine to receive profile timer ticks. The returned + // token is to be used when unregistering this callback and must not be + // deleted by the caller. + ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback, + void* callback_arg); + + // Unregisters a previously registered callback. Expects the token returned + // by the corresponding RegisterCallback routine. + void UnregisterCallback(ProfileHandlerToken* token) + NO_THREAD_SAFETY_ANALYSIS; + + // Unregisters all the callbacks and stops the timer(s). + void Reset(); + + // Gets the current state of profile handler. + void GetState(ProfileHandlerState* state); + + // Initializes and returns the ProfileHandler singleton. + static ProfileHandler* Instance(); + + private: + ProfileHandler(); + ~ProfileHandler(); + + // Largest allowed frequency. + static const int32 kMaxFrequency = 4000; + // Default frequency. + static const int32 kDefaultFrequency = 100; + + // ProfileHandler singleton. + static ProfileHandler* instance_; + + // pthread_once_t for one time initialization of ProfileHandler singleton. + static pthread_once_t once_; + + // Initializes the ProfileHandler singleton via GoogleOnceInit. + static void Init(); + + // Timer state as configured previously. + bool timer_running_; + + // The number of profiling signal interrupts received. + int64 interrupts_ GUARDED_BY(signal_lock_); + + // Profiling signal interrupt frequency, read-only after construction. + int32 frequency_; + + // ITIMER_PROF (which uses SIGPROF), or ITIMER_REAL (which uses SIGALRM). + // Translated into an equivalent choice of clock if per_thread_timer_enabled_ + // is true. + int timer_type_; + + // Signal number for timer signal. + int signal_number_; + + // Counts the number of callbacks registered. + int32 callback_count_ GUARDED_BY(control_lock_); + + // Is profiling allowed at all? + bool allowed_; + + // Must be false if HAVE_LINUX_SIGEV_THREAD_ID is not defined. + bool per_thread_timer_enabled_; + +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + // this is used to destroy per-thread profiling timers on thread + // termination + pthread_key_t thread_timer_key; +#endif + + // This lock serializes the registration of threads and protects the + // callbacks_ list below. + // Locking order: + // In the context of a signal handler, acquire signal_lock_ to walk the + // callback list. Otherwise, acquire control_lock_, disable the signal + // handler and then acquire signal_lock_. + SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_); + SpinLock signal_lock_; + + // Holds the list of registered callbacks. We expect the list to be pretty + // small. Currently, the cpu profiler (base/profiler) and thread module + // (base/thread.h) are the only two components registering callbacks. + // Following are the locking requirements for callbacks_: + // For read-write access outside the SIGPROF handler: + // - Acquire control_lock_ + // - Disable SIGPROF handler. + // - Acquire signal_lock_ + // For read-only access in the context of SIGPROF handler + // (Read-write access is *not allowed* in the SIGPROF handler) + // - Acquire signal_lock_ + // For read-only access outside SIGPROF handler: + // - Acquire control_lock_ + typedef list<ProfileHandlerToken*> CallbackList; + typedef CallbackList::iterator CallbackIterator; + CallbackList callbacks_ GUARDED_BY(signal_lock_); + + // Starts or stops the interval timer. + // Will ignore any requests to enable or disable when + // per_thread_timer_enabled_ is true. + void UpdateTimer(bool enable) EXCLUSIVE_LOCKS_REQUIRED(signal_lock_); + + // Returns true if the handler is not being used by something else. + // This checks the kernel's signal handler table. + bool IsSignalHandlerAvailable(); + + // Signal handler. Iterates over and calls all the registered callbacks. + static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext); + + DISALLOW_COPY_AND_ASSIGN(ProfileHandler); +}; + +ProfileHandler* ProfileHandler::instance_ = NULL; +pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT; + +const int32 ProfileHandler::kMaxFrequency; +const int32 ProfileHandler::kDefaultFrequency; + +// If we are LD_PRELOAD-ed against a non-pthreads app, then these functions +// won't be defined. We declare them here, for that case (with weak linkage) +// which will cause the non-definition to resolve to NULL. We can then check +// for NULL or not in Instance. +extern "C" { +int pthread_once(pthread_once_t *, void (*)(void)) ATTRIBUTE_WEAK; +int pthread_kill(pthread_t thread_id, int signo) ATTRIBUTE_WEAK; + +#if HAVE_LINUX_SIGEV_THREAD_ID +int timer_create(clockid_t clockid, struct sigevent* evp, + timer_t* timerid) ATTRIBUTE_WEAK; +int timer_delete(timer_t timerid) ATTRIBUTE_WEAK; +int timer_settime(timer_t timerid, int flags, const struct itimerspec* value, + struct itimerspec* ovalue) ATTRIBUTE_WEAK; +#endif +} + +#if HAVE_LINUX_SIGEV_THREAD_ID + +struct timer_id_holder { + timer_t timerid; + timer_id_holder(timer_t _timerid) : timerid(_timerid) {} +}; + +extern "C" { + static void ThreadTimerDestructor(void *arg) { + if (!arg) { + return; + } + timer_id_holder *holder = static_cast<timer_id_holder *>(arg); + timer_delete(holder->timerid); + delete holder; + } +} + +static void CreateThreadTimerKey(pthread_key_t *pkey) { + int rv = perftools_pthread_key_create(pkey, ThreadTimerDestructor); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_key_create error: %s", strerror(rv)); + } +} + +static void StartLinuxThreadTimer(int timer_type, int signal_number, + int32 frequency, pthread_key_t timer_key) { + int rv; + struct sigevent sevp; + timer_t timerid; + struct itimerspec its; + memset(&sevp, 0, sizeof(sevp)); + sevp.sigev_notify = SIGEV_THREAD_ID; + sevp._sigev_un._tid = sys_gettid(); + sevp.sigev_signo = signal_number; + clockid_t clock = CLOCK_THREAD_CPUTIME_ID; + if (timer_type == ITIMER_REAL) { + clock = CLOCK_MONOTONIC; + } + rv = timer_create(clock, &sevp, &timerid); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_create error: %s", strerror(errno)); + } + + timer_id_holder *holder = new timer_id_holder(timerid); + rv = perftools_pthread_setspecific(timer_key, holder); + if (rv) { + RAW_LOG(FATAL, "aborting due to pthread_setspecific error: %s", strerror(rv)); + } + + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 1000000000 / frequency; + its.it_value = its.it_interval; + rv = timer_settime(timerid, 0, &its, 0); + if (rv) { + RAW_LOG(FATAL, "aborting due to timer_settime error: %s", strerror(errno)); + } +} +#endif + +void ProfileHandler::Init() { + instance_ = new ProfileHandler(); +} + +ProfileHandler* ProfileHandler::Instance() { + if (pthread_once) { + pthread_once(&once_, Init); + } + if (instance_ == NULL) { + // This will be true on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + Init(); + assert(instance_ != NULL); + } + return instance_; +} + +ProfileHandler::ProfileHandler() + : timer_running_(false), + interrupts_(0), + callback_count_(0), + allowed_(true), + per_thread_timer_enabled_(false) { + SpinLockHolder cl(&control_lock_); + + timer_type_ = (getenv("CPUPROFILE_REALTIME") ? ITIMER_REAL : ITIMER_PROF); + signal_number_ = (timer_type_ == ITIMER_PROF ? SIGPROF : SIGALRM); + + // Get frequency of interrupts (if specified) + char junk; + const char* fr = getenv("CPUPROFILE_FREQUENCY"); + if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) && + (frequency_ > 0)) { + // Limit to kMaxFrequency + frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_; + } else { + frequency_ = kDefaultFrequency; + } + + if (!allowed_) { + return; + } + +#if HAVE_LINUX_SIGEV_THREAD_ID + // Do this early because we might be overriding signal number. + + const char *per_thread = getenv("CPUPROFILE_PER_THREAD_TIMERS"); + const char *signal_number = getenv("CPUPROFILE_TIMER_SIGNAL"); + + if (per_thread || signal_number) { + if (timer_create && pthread_once) { + CreateThreadTimerKey(&thread_timer_key); + per_thread_timer_enabled_ = true; + // Override signal number if requested. + if (signal_number) { + signal_number_ = strtol(signal_number, NULL, 0); + } + } else { + RAW_LOG(INFO, + "Ignoring CPUPROFILE_PER_THREAD_TIMERS and\n" + " CPUPROFILE_TIMER_SIGNAL due to lack of timer_create().\n" + " Preload or link to librt.so for this to work"); + } + } +#endif + + // If something else is using the signal handler, + // assume it has priority over us and stop. + if (!IsSignalHandlerAvailable()) { + RAW_LOG(INFO, "Disabling profiler because signal %d handler is already in use.", + signal_number_); + allowed_ = false; + return; + } + + // Install the signal handler. + struct sigaction sa; + sa.sa_sigaction = SignalHandler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(signal_number_, &sa, NULL) == 0, "sigprof (enable)"); +} + +ProfileHandler::~ProfileHandler() { + Reset(); +#ifdef HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + perftools_pthread_key_delete(thread_timer_key); + } +#endif +} + +void ProfileHandler::RegisterThread() { + SpinLockHolder cl(&control_lock_); + + if (!allowed_) { + return; + } + + // Record the thread identifier and start the timer if profiling is on. + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); +#if HAVE_LINUX_SIGEV_THREAD_ID + if (per_thread_timer_enabled_) { + StartLinuxThreadTimer(timer_type_, signal_number_, frequency_, + thread_timer_key); + return; + } +#endif + UpdateTimer(callback_count_ > 0); +} + +ProfileHandlerToken* ProfileHandler::RegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + + ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg); + + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + callbacks_.push_back(token); + ++callback_count_; + UpdateTimer(true); + } + return token; +} + +void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) { + SpinLockHolder cl(&control_lock_); + for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end(); + ++it) { + if ((*it) == token) { + RAW_CHECK(callback_count_ > 0, "Invalid callback count"); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + delete *it; + callbacks_.erase(it); + --callback_count_; + if (callback_count_ == 0) + UpdateTimer(false); + } + return; + } + } + // Unknown token. + RAW_LOG(FATAL, "Invalid token"); +} + +void ProfileHandler::Reset() { + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); + CallbackIterator it = callbacks_.begin(); + while (it != callbacks_.end()) { + CallbackIterator tmp = it; + ++it; + delete *tmp; + callbacks_.erase(tmp); + } + callback_count_ = 0; + UpdateTimer(false); + } +} + +void ProfileHandler::GetState(ProfileHandlerState* state) { + SpinLockHolder cl(&control_lock_); + { + ScopedSignalBlocker block(signal_number_); + SpinLockHolder sl(&signal_lock_); // Protects interrupts_. + state->interrupts = interrupts_; + } + state->frequency = frequency_; + state->callback_count = callback_count_; + state->allowed = allowed_; +} + +void ProfileHandler::UpdateTimer(bool enable) { + if (per_thread_timer_enabled_) { + // Ignore any attempts to disable it because that's not supported, and it's + // always enabled so enabling is always a NOP. + return; + } + + if (enable == timer_running_) { + return; + } + timer_running_ = enable; + + struct itimerval timer; + static const int kMillion = 1000000; + int interval_usec = enable ? kMillion / frequency_ : 0; + timer.it_interval.tv_sec = interval_usec / kMillion; + timer.it_interval.tv_usec = interval_usec % kMillion; + timer.it_value = timer.it_interval; + setitimer(timer_type_, &timer, 0); +} + +bool ProfileHandler::IsSignalHandlerAvailable() { + struct sigaction sa; + RAW_CHECK(sigaction(signal_number_, NULL, &sa) == 0, "is-signal-handler avail"); + + // We only take over the handler if the current one is unset. + // It must be SIG_IGN or SIG_DFL, not some other function. + // SIG_IGN must be allowed because when profiling is allowed but + // not actively in use, this code keeps the handler set to SIG_IGN. + // That setting will be inherited across fork+exec. In order for + // any child to be able to use profiling, SIG_IGN must be treated + // as available. + return sa.sa_handler == SIG_IGN || sa.sa_handler == SIG_DFL; +} + +void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) { + int saved_errno = errno; + // At this moment, instance_ must be initialized because the handler is + // enabled in RegisterThread or RegisterCallback only after + // ProfileHandler::Instance runs. + ProfileHandler* instance = ANNOTATE_UNPROTECTED_READ(instance_); + RAW_CHECK(instance != NULL, "ProfileHandler is not initialized"); + { + SpinLockHolder sl(&instance->signal_lock_); + ++instance->interrupts_; + for (CallbackIterator it = instance->callbacks_.begin(); + it != instance->callbacks_.end(); + ++it) { + (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg); + } + } + errno = saved_errno; +} + +// This module initializer registers the main thread, so it must be +// executed in the context of the main thread. +REGISTER_MODULE_INITIALIZER(profile_main, ProfileHandlerRegisterThread()); + +void ProfileHandlerRegisterThread() { + ProfileHandler::Instance()->RegisterThread(); +} + +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg); +} + +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { + ProfileHandler::Instance()->UnregisterCallback(token); +} + +void ProfileHandlerReset() { + return ProfileHandler::Instance()->Reset(); +} + +void ProfileHandlerGetState(ProfileHandlerState* state) { + ProfileHandler::Instance()->GetState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +void ProfileHandlerRegisterThread() { +} + +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return NULL; +} + +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { +} + +void ProfileHandlerReset() { +} + +void ProfileHandlerGetState(ProfileHandlerState* state) { +} + +#endif // OS_CYGWIN diff --git a/src/third_party/gperftools-2.7/src/profile-handler.h b/src/third_party/gperftools-2.7/src/profile-handler.h new file mode 100644 index 00000000000..3eae169d55a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/profile-handler.h @@ -0,0 +1,142 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Nabeel Mian + * + * This module manages the cpu profile timers and the associated interrupt + * handler. When enabled, all threads in the program are profiled. + * + * Any component interested in receiving a profile timer interrupt can do so by + * registering a callback. All registered callbacks must be async-signal-safe. + * + * Note: This module requires the sole ownership of the configured timer and + * signal. The timer defaults to ITIMER_PROF, can be changed to ITIMER_REAL by + * the environment variable CPUPROFILE_REALTIME, or is changed to a POSIX timer + * with CPUPROFILE_PER_THREAD_TIMERS. The signal defaults to SIGPROF/SIGALRM to + * match the choice of timer and can be set to an arbitrary value using + * CPUPROFILE_TIMER_SIGNAL with CPUPROFILE_PER_THREAD_TIMERS. + */ + +#ifndef BASE_PROFILE_HANDLER_H_ +#define BASE_PROFILE_HANDLER_H_ + +#include "config.h" +#include <signal.h> +#ifdef COMPILER_MSVC +#include "conflict-signal.h" +#endif +#include "base/basictypes.h" + +/* Forward declaration. */ +struct ProfileHandlerToken; + +/* + * Callback function to be used with ProfilefHandlerRegisterCallback. This + * function will be called in the context of SIGPROF signal handler and must + * be async-signal-safe. The first three arguments are the values provided by + * the SIGPROF signal handler. We use void* to avoid using ucontext_t on + * non-POSIX systems. + * + * Requirements: + * - Callback must be async-signal-safe. + * - None of the functions in ProfileHandler are async-signal-safe. Therefore, + * callback function *must* not call any of the ProfileHandler functions. + * - Callback is not required to be re-entrant. At most one instance of + * callback can run at a time. + * + * Notes: + * - The SIGPROF signal handler saves and restores errno, so the callback + * doesn't need to. + * - Callback code *must* not acquire lock(s) to serialize access to data shared + * with the code outside the signal handler (callback must be + * async-signal-safe). If such a serialization is needed, follow the model + * used by profiler.cc: + * + * When code other than the signal handler modifies the shared data it must: + * - Acquire lock. + * - Unregister the callback with the ProfileHandler. + * - Modify shared data. + * - Re-register the callback. + * - Release lock. + * and the callback code gets a lockless, read-write access to the data. + */ +typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info, + void* ucontext, void* callback_arg); + +/* + * Registers a new thread with profile handler and should be called only once + * per thread. The main thread is registered at program startup. This routine + * is called by the Thread module in google3/thread whenever a new thread is + * created. This function is not async-signal-safe. + */ +void ProfileHandlerRegisterThread(); + +/* + * Registers a callback routine. This callback function will be called in the + * context of SIGPROF handler, so must be async-signal-safe. The returned token + * is to be used when unregistering this callback via + * ProfileHandlerUnregisterCallback. Registering the first callback enables + * the SIGPROF signal handler. Caller must not free the returned token. This + * function is not async-signal-safe. + */ +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg); + +/* + * Unregisters a previously registered callback. Expects the token returned + * by the corresponding ProfileHandlerRegisterCallback and asserts that the + * passed token is valid. Unregistering the last callback disables the SIGPROF + * signal handler. It waits for the currently running callback to + * complete before returning. This function is not async-signal-safe. + */ +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token); + +/* + * FOR TESTING ONLY + * Unregisters all the callbacks, stops the timers (if shared) and disables the + * SIGPROF handler. All the threads, including the main thread, need to be + * re-registered after this call. This function is not async-signal-safe. + */ +void ProfileHandlerReset(); + +/* + * Stores profile handler's current state. This function is not + * async-signal-safe. + */ +struct ProfileHandlerState { + int32 frequency; /* Profiling frequency */ + int32 callback_count; /* Number of callbacks registered */ + int64 interrupts; /* Number of interrupts received */ + bool allowed; /* Profiling is allowed */ +}; +void ProfileHandlerGetState(struct ProfileHandlerState* state); + +#endif /* BASE_PROFILE_HANDLER_H_ */ diff --git a/src/third_party/gperftools-2.7/src/profiledata.cc b/src/third_party/gperftools-2.7/src/profiledata.cc new file mode 100644 index 00000000000..8b05d3aa45c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/profiledata.cc @@ -0,0 +1,332 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. + +#include <config.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <sys/time.h> +#include <string.h> +#include <fcntl.h> + +#include "profiledata.h" + +#include "base/logging.h" +#include "base/sysinfo.h" + +// All of these are initialized in profiledata.h. +const int ProfileData::kMaxStackDepth; +const int ProfileData::kAssociativity; +const int ProfileData::kBuckets; +const int ProfileData::kBufferLength; + +ProfileData::Options::Options() + : frequency_(1) { +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::Evict(const Entry& entry) { + const int d = entry.depth; + const int nslots = d + 2; // Number of slots needed in eviction buffer + if (num_evicted_ + nslots > kBufferLength) { + FlushEvicted(); + assert(num_evicted_ == 0); + assert(nslots <= kBufferLength); + } + evict_[num_evicted_++] = entry.count; + evict_[num_evicted_++] = d; + memcpy(&evict_[num_evicted_], entry.stack, d * sizeof(Slot)); + num_evicted_ += d; +} + +ProfileData::ProfileData() + : hash_(0), + evict_(0), + num_evicted_(0), + out_(-1), + count_(0), + evictions_(0), + total_bytes_(0), + fname_(0), + start_time_(0) { +} + +bool ProfileData::Start(const char* fname, + const ProfileData::Options& options) { + if (enabled()) { + return false; + } + + // Open output file and initialize various data structures + int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (fd < 0) { + // Can't open outfile for write + return false; + } + + start_time_ = time(NULL); + fname_ = strdup(fname); + + // Reset counters + num_evicted_ = 0; + count_ = 0; + evictions_ = 0; + total_bytes_ = 0; + + hash_ = new Bucket[kBuckets]; + evict_ = new Slot[kBufferLength]; + memset(hash_, 0, sizeof(hash_[0]) * kBuckets); + + // Record special entries + evict_[num_evicted_++] = 0; // count for header + evict_[num_evicted_++] = 3; // depth for header + evict_[num_evicted_++] = 0; // Version number + CHECK_NE(0, options.frequency()); + int period = 1000000 / options.frequency(); + evict_[num_evicted_++] = period; // Period (microseconds) + evict_[num_evicted_++] = 0; // Padding + + out_ = fd; + + return true; +} + +ProfileData::~ProfileData() { + Stop(); +} + +// Dump /proc/maps data to fd. Copied from heap-profile-table.cc. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +static void FDWrite(int fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + RAW_CHECK(r >= 0, "write failed"); + buf += r; + len -= r; + } +} + +static void DumpProcSelfMaps(int fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + FDWrite(fd, linebuf.buf_, written); + } +} + +void ProfileData::Stop() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + } + } + } + + if (num_evicted_ + 3 > kBufferLength) { + // Ensure there is enough room for end of data marker + FlushEvicted(); + } + + // Write end of data marker + evict_[num_evicted_++] = 0; // count + evict_[num_evicted_++] = 1; // depth + evict_[num_evicted_++] = 0; // end of data marker + FlushEvicted(); + + // Dump "/proc/self/maps" so we get list of mapped shared libraries + DumpProcSelfMaps(out_); + + Reset(); + fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n", + count_, evictions_, total_bytes_); +} + +void ProfileData::Reset() { + if (!enabled()) { + return; + } + + // Don't reset count_, evictions_, or total_bytes_ here. They're used + // by Stop to print information about the profile after reset, and are + // cleared by Start when starting a new profile. + close(out_); + delete[] hash_; + hash_ = 0; + delete[] evict_; + evict_ = 0; + num_evicted_ = 0; + free(fname_); + fname_ = 0; + start_time_ = 0; + + out_ = -1; +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::GetCurrentState(State* state) const { + if (enabled()) { + state->enabled = true; + state->start_time = start_time_; + state->samples_gathered = count_; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, fname_, buf_size); + state->profile_name[buf_size-1] = '\0'; + } else { + state->enabled = false; + state->start_time = 0; + state->samples_gathered = 0; + state->profile_name[0] = '\0'; + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushTable() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + bucket->entry[a].depth = 0; + bucket->entry[a].count = 0; + } + } + } + + // Write out all pending data + FlushEvicted(); +} + +void ProfileData::Add(int depth, const void* const* stack) { + if (!enabled()) { + return; + } + + if (depth > kMaxStackDepth) depth = kMaxStackDepth; + RAW_CHECK(depth > 0, "ProfileData::Add depth <= 0"); + + // Make hash-value + Slot h = 0; + for (int i = 0; i < depth; i++) { + Slot slot = reinterpret_cast<Slot>(stack[i]); + h = (h << 8) | (h >> (8*(sizeof(h)-1))); + h += (slot * 31) + (slot * 7) + (slot * 3); + } + + count_++; + + // See if table already has an entry for this trace + bool done = false; + Bucket* bucket = &hash_[h % kBuckets]; + for (int a = 0; a < kAssociativity; a++) { + Entry* e = &bucket->entry[a]; + if (e->depth == depth) { + bool match = true; + for (int i = 0; i < depth; i++) { + if (e->stack[i] != reinterpret_cast<Slot>(stack[i])) { + match = false; + break; + } + } + if (match) { + e->count++; + done = true; + break; + } + } + } + + if (!done) { + // Evict entry with smallest count + Entry* e = &bucket->entry[0]; + for (int a = 1; a < kAssociativity; a++) { + if (bucket->entry[a].count < e->count) { + e = &bucket->entry[a]; + } + } + if (e->count > 0) { + evictions_++; + Evict(*e); + } + + // Use the newly evicted entry + e->depth = depth; + e->count = 1; + for (int i = 0; i < depth; i++) { + e->stack[i] = reinterpret_cast<Slot>(stack[i]); + } + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushEvicted() { + if (num_evicted_ > 0) { + const char* buf = reinterpret_cast<char*>(evict_); + size_t bytes = sizeof(evict_[0]) * num_evicted_; + total_bytes_ += bytes; + FDWrite(out_, buf, bytes); + } + num_evicted_ = 0; +} diff --git a/src/third_party/gperftools-2.7/src/profiledata.h b/src/third_party/gperftools-2.7/src/profiledata.h new file mode 100644 index 00000000000..b94b28cff8e --- /dev/null +++ b/src/third_party/gperftools-2.7/src/profiledata.h @@ -0,0 +1,184 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. +// +// The profile data file format is documented in +// docs/cpuprofile-fileformat.html + + +#ifndef BASE_PROFILEDATA_H_ +#define BASE_PROFILEDATA_H_ + +#include <config.h> +#include <time.h> // for time_t +#include <stdint.h> +#include "base/basictypes.h" + +// A class that accumulates profile samples and writes them to a file. +// +// Each sample contains a stack trace and a count. Memory usage is +// reduced by combining profile samples that have the same stack trace +// by adding up the associated counts. +// +// Profile data is accumulated in a bounded amount of memory, and will +// flushed to a file as necessary to stay within the memory limit. +// +// Use of this class assumes external synchronization. The exact +// requirements of that synchronization are that: +// +// - 'Add' may be called from asynchronous signals, but is not +// re-entrant. +// +// - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be +// called at the same time. +// +// - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled' +// or 'GetCurrent' are running, and vice versa. +// +// A profiler which uses asyncronous signals to add samples will +// typically use two locks to protect this data structure: +// +// - A SpinLock which is held over all calls except for the 'Add' +// call made from the signal handler. +// +// - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset', +// 'Flush', and 'Add'. (This SpinLock should be acquired after +// the first SpinLock in all cases where both are needed.) +class ProfileData { + public: + struct State { + bool enabled; // Is profiling currently enabled? + time_t start_time; // If enabled, when was profiling started? + char profile_name[1024]; // Name of file being written, or '\0' + int samples_gathered; // Number of samples gathered to far (or 0) + }; + + class Options { + public: + Options(); + + // Get and set the sample frequency. + int frequency() const { + return frequency_; + } + void set_frequency(int frequency) { + frequency_ = frequency; + } + + private: + int frequency_; // Sample frequency. + }; + + static const int kMaxStackDepth = 64; // Max stack depth stored in profile + + ProfileData(); + ~ProfileData(); + + // If data collection is not already enabled start to collect data + // into fname. Parameters related to this profiling run are specified + // by 'options'. + // + // Returns true if data collection could be started, otherwise (if an + // error occurred or if data collection was already enabled) returns + // false. + bool Start(const char *fname, const Options& options); + + // If data collection is enabled, stop data collection and write the + // data to disk. + void Stop(); + + // Stop data collection without writing anything else to disk, and + // discard any collected data. + void Reset(); + + // If data collection is enabled, record a sample with 'depth' + // entries from 'stack'. (depth must be > 0.) At most + // kMaxStackDepth stack entries will be recorded, starting with + // stack[0]. + // + // This function is safe to call from asynchronous signals (but is + // not re-entrant). + void Add(int depth, const void* const* stack); + + // If data collection is enabled, write the data to disk (and leave + // the collector enabled). + void FlushTable(); + + // Is data collection currently enabled? + bool enabled() const { return out_ >= 0; } + + // Get the current state of the data collector. + void GetCurrentState(State* state) const; + + private: + static const int kAssociativity = 4; // For hashtable + static const int kBuckets = 1 << 10; // For hashtable + static const int kBufferLength = 1 << 18; // For eviction buffer + + // Type of slots: each slot can be either a count, or a PC value + typedef uintptr_t Slot; + + // Hash-table/eviction-buffer entry (a.k.a. a sample) + struct Entry { + Slot count; // Number of hits + Slot depth; // Stack depth + Slot stack[kMaxStackDepth]; // Stack contents + }; + + // Hash table bucket + struct Bucket { + Entry entry[kAssociativity]; + }; + + Bucket* hash_; // hash table + Slot* evict_; // evicted entries + int num_evicted_; // how many evicted entries? + int out_; // fd for output file. + int count_; // How many samples recorded + int evictions_; // How many evictions + size_t total_bytes_; // How much output + char* fname_; // Profile file name + time_t start_time_; // Start time, or 0 + + // Move 'entry' to the eviction buffer. + void Evict(const Entry& entry); + + // Write contents of eviction buffer to disk. + void FlushEvicted(); + + DISALLOW_COPY_AND_ASSIGN(ProfileData); +}; + +#endif // BASE_PROFILEDATA_H_ diff --git a/src/third_party/gperftools-2.7/src/profiler.cc b/src/third_party/gperftools-2.7/src/profiler.cc new file mode 100644 index 00000000000..f4f59900088 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/profiler.cc @@ -0,0 +1,431 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Profile current program by sampling stack-trace every so often + +#include "config.h" +#include "getpc.h" // should be first to get the _GNU_SOURCE dfn +#include <signal.h> +#include <assert.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpid() +#endif +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#else +typedef int ucontext_t; // just to quiet the compiler, mostly +#endif +#include <sys/time.h> +#include <string> +#include <gperftools/profiler.h> +#include <gperftools/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/googleinit.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */ +#include "profiledata.h" +#include "profile-handler.h" +#ifdef HAVE_CONFLICT_SIGNAL_H +#include "conflict-signal.h" /* used on msvc machines */ +#endif + +using std::string; + +DEFINE_bool(cpu_profiler_unittest, + EnvToBool("PERFTOOLS_UNITTEST", true), + "Determines whether or not we are running under the \ + control of a unit test. This allows us to include or \ + exclude certain behaviours."); + +// Collects up all profile data. This is a singleton, which is +// initialized by a constructor at startup. If no cpu profiler +// signal is specified then the profiler lifecycle is either +// manaully controlled via the API or attached to the scope of +// the singleton (program scope). Otherwise the cpu toggle is +// used to allow for user selectable control via signal generation. +// This is very useful for profiling a daemon process without +// having to start and stop the daemon or having to modify the +// source code to use the cpu profiler API. +class CpuProfiler { + public: + CpuProfiler(); + ~CpuProfiler(); + + // Start profiler to write profile info into fname + bool Start(const char* fname, const ProfilerOptions* options); + + // Stop profiling and write the data to disk. + void Stop(); + + // Write the data to disk (and continue profiling). + void FlushTable(); + + bool Enabled(); + + void GetCurrentState(ProfilerState* state); + + static CpuProfiler instance_; + + private: + // This lock implements the locking requirements described in the ProfileData + // documentation, specifically: + // + // lock_ is held all over all collector_ method calls except for the 'Add' + // call made from the signal handler, to protect against concurrent use of + // collector_'s control routines. Code other than signal handler must + // unregister the signal handler before calling any collector_ method. + // 'Add' method in the collector is protected by a guarantee from + // ProfileHandle that only one instance of prof_handler can run at a time. + SpinLock lock_; + ProfileData collector_; + + // Filter function and its argument, if any. (NULL means include all + // samples). Set at start, read-only while running. Written while holding + // lock_, read and executed in the context of SIGPROF interrupt. + int (*filter_)(void*); + void* filter_arg_; + + // Opaque token returned by the profile handler. To be used when calling + // ProfileHandlerUnregisterCallback. + ProfileHandlerToken* prof_handler_token_; + + // Sets up a callback to receive SIGPROF interrupt. + void EnableHandler(); + + // Disables receiving SIGPROF interrupt. + void DisableHandler(); + + // Signal handler that records the interrupted pc in the profile data. + static void prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler); +}; + +// Signal handler that is registered when a user selectable signal +// number is defined in the environment variable CPUPROFILESIGNAL. +static void CpuProfilerSwitch(int signal_number) +{ + bool static started = false; + static unsigned profile_count = 0; + static char base_profile_name[1024] = "\0"; + + if (base_profile_name[0] == '\0') { + if (!GetUniquePathFromEnv("CPUPROFILE", base_profile_name)) { + RAW_LOG(FATAL,"Cpu profiler switch is registered but no CPUPROFILE is defined"); + return; + } + } + if (!started) + { + char full_profile_name[1024]; + + snprintf(full_profile_name, sizeof(full_profile_name), "%s.%u", + base_profile_name, profile_count++); + + if(!ProfilerStart(full_profile_name)) + { + RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", + full_profile_name, strerror(errno)); + } + } + else + { + ProfilerStop(); + } + started = !started; +} + +// Profile data structure singleton: Constructor will check to see if +// profiling should be enabled. Destructor will write profile data +// out to disk. +CpuProfiler CpuProfiler::instance_; + +// Initialize profiling: activated if getenv("CPUPROFILE") exists. +CpuProfiler::CpuProfiler() + : prof_handler_token_(NULL) { + // TODO(cgd) Move this code *out* of the CpuProfile constructor into a + // separate object responsible for initialization. With ProfileHandler there + // is no need to limit the number of profilers. + if (getenv("CPUPROFILE") == NULL) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n"); + } + return; + } + + // We don't enable profiling if setuid -- it's a security risk +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "Cannot perform CPU profiling when running with setuid\n"); + } + return; + } +#endif + + char *signal_number_str = getenv("CPUPROFILESIGNAL"); + if (signal_number_str != NULL) { + long int signal_number = strtol(signal_number_str, NULL, 10); + if (signal_number >= 1 && signal_number <= 64) { + intptr_t old_signal_handler = reinterpret_cast<intptr_t>(signal(signal_number, CpuProfilerSwitch)); + if (old_signal_handler == 0) { + RAW_LOG(INFO,"Using signal %d as cpu profiling switch", signal_number); + } else { + RAW_LOG(FATAL, "Signal %d already in use\n", signal_number); + } + } else { + RAW_LOG(FATAL, "Signal number %s is invalid\n", signal_number_str); + } + } else { + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("CPUPROFILE", fname)) { + if (!FLAGS_cpu_profiler_unittest) { + RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n"); + } + return; + } + + if (!Start(fname, NULL)) { + RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", + fname, strerror(errno)); + } + } +} + +bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) { + SpinLockHolder cl(&lock_); + + if (collector_.enabled()) { + return false; + } + + ProfileHandlerState prof_handler_state; + ProfileHandlerGetState(&prof_handler_state); + + ProfileData::Options collector_options; + collector_options.set_frequency(prof_handler_state.frequency); + if (!collector_.Start(fname, collector_options)) { + return false; + } + + filter_ = NULL; + if (options != NULL && options->filter_in_thread != NULL) { + filter_ = options->filter_in_thread; + filter_arg_ = options->filter_in_thread_arg; + } + + // Setup handler for SIGPROF interrupts + EnableHandler(); + + return true; +} + +CpuProfiler::~CpuProfiler() { + Stop(); +} + +// Stop profiling and write out any collected profile data +void CpuProfiler::Stop() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // stopping the collector. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to stop the collector. + collector_.Stop(); +} + +void CpuProfiler::FlushTable() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // flushing the profile data. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to flush the profile data. + collector_.FlushTable(); + + EnableHandler(); +} + +bool CpuProfiler::Enabled() { + SpinLockHolder cl(&lock_); + return collector_.enabled(); +} + +void CpuProfiler::GetCurrentState(ProfilerState* state) { + ProfileData::State collector_state; + { + SpinLockHolder cl(&lock_); + collector_.GetCurrentState(&collector_state); + } + + state->enabled = collector_state.enabled; + state->start_time = static_cast<time_t>(collector_state.start_time); + state->samples_gathered = collector_state.samples_gathered; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, collector_state.profile_name, buf_size); + state->profile_name[buf_size-1] = '\0'; +} + +void CpuProfiler::EnableHandler() { + RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered"); + prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this); + RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler"); +} + +void CpuProfiler::DisableHandler() { + RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered"); + ProfileHandlerUnregisterCallback(prof_handler_token_); + prof_handler_token_ = NULL; +} + +// Signal handler that records the pc in the profile-data structure. We do no +// synchronization here. profile-handler.cc guarantees that at most one +// instance of prof_handler() will run at a time. All other routines that +// access the data touched by prof_handler() disable this signal handler before +// accessing the data and therefore cannot execute concurrently with +// prof_handler(). +void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler) { + CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler); + + if (instance->filter_ == NULL || + (*instance->filter_)(instance->filter_arg_)) { + void* stack[ProfileData::kMaxStackDepth]; + + // Under frame-pointer-based unwinding at least on x86, the + // top-most active routine doesn't show up as a normal frame, but + // as the "pc" value in the signal handler context. + stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); + + // We skip the top three stack trace entries (this function, + // SignalHandler::SignalHandler and one signal handler frame) + // since they are artifacts of profiling and should not be + // measured. Other profiling related frames may be removed by + // "pprof" at analysis time. Instead of skipping the top frames, + // we could skip nothing, but that would increase the profile size + // unnecessarily. + int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1, + 3, signal_ucontext); + + void **used_stack; + if (depth > 0 && stack[1] == stack[0]) { + // in case of non-frame-pointer-based unwinding we will get + // duplicate of PC in stack[1], which we don't want + used_stack = stack + 1; + } else { + used_stack = stack; + depth++; // To account for pc value in stack[0]; + } + + instance->collector_.Add(depth, used_stack); + } +} + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +extern "C" PERFTOOLS_DLL_DECL void ProfilerRegisterThread() { + ProfileHandlerRegisterThread(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerFlush() { + CpuProfiler::instance_.FlushTable(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads() { + return CpuProfiler::instance_.Enabled(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname) { + return CpuProfiler::instance_.Start(fname, NULL); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const ProfilerOptions *options) { + return CpuProfiler::instance_.Start(fname, options); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerStop() { + CpuProfiler::instance_.Stop(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState( + ProfilerState* state) { + CpuProfiler::instance_.GetCurrentState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfilerRegisterThread() { } +extern "C" void ProfilerFlush() { } +extern "C" int ProfilingIsEnabledForAllThreads() { return 0; } +extern "C" int ProfilerStart(const char* fname) { return 0; } +extern "C" int ProfilerStartWithOptions(const char *fname, + const ProfilerOptions *options) { + return 0; +} +extern "C" void ProfilerStop() { } +extern "C" void ProfilerGetCurrentState(ProfilerState* state) { + memset(state, 0, sizeof(*state)); +} + +#endif // OS_CYGWIN + +// DEPRECATED routines +extern "C" PERFTOOLS_DLL_DECL void ProfilerEnable() { } +extern "C" PERFTOOLS_DLL_DECL void ProfilerDisable() { } diff --git a/src/third_party/gperftools-2.7/src/raw_printer.cc b/src/third_party/gperftools-2.7/src/raw_printer.cc new file mode 100644 index 00000000000..3cf028eeae0 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/raw_printer.cc @@ -0,0 +1,72 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: sanjay@google.com (Sanjay Ghemawat) + +#include <config.h> +#include <stdarg.h> +#include <stdio.h> +#include "raw_printer.h" +#include "base/logging.h" + +namespace base { + +RawPrinter::RawPrinter(char* buf, int length) + : base_(buf), + ptr_(buf), + limit_(buf + length - 1) { + RAW_DCHECK(length > 0, ""); + *ptr_ = '\0'; + *limit_ = '\0'; +} + +void RawPrinter::Printf(const char* format, ...) { + if (limit_ > ptr_) { + va_list ap; + va_start(ap, format); + int avail = limit_ - ptr_; + // We pass avail+1 to vsnprintf() since that routine needs room + // to store the trailing \0. + const int r = perftools_vsnprintf(ptr_, avail+1, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + ptr_ = limit_; + } else if (r > avail) { + // Truncation + ptr_ = limit_; + } else { + ptr_ += r; + } + } +} + +} diff --git a/src/third_party/gperftools-2.7/src/raw_printer.h b/src/third_party/gperftools-2.7/src/raw_printer.h new file mode 100644 index 00000000000..9288bb5eeaa --- /dev/null +++ b/src/third_party/gperftools-2.7/src/raw_printer.h @@ -0,0 +1,90 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A printf() wrapper that writes into a fixed length buffer. +// Useful in low-level code that does not want to use allocating +// routines like StringPrintf(). +// +// The implementation currently uses vsnprintf(). This seems to +// be fine for use in many low-level contexts, but we may need to +// rethink this decision if we hit a problem with it calling +// down into malloc() etc. + +#ifndef BASE_RAW_PRINTER_H_ +#define BASE_RAW_PRINTER_H_ + +#include <config.h> +#include "base/basictypes.h" + +namespace base { + +class RawPrinter { + public: + // REQUIRES: "length > 0" + // Will printf any data added to this into "buf[0,length-1]" and + // will arrange to always keep buf[] null-terminated. + RawPrinter(char* buf, int length); + + // Return the number of bytes that have been appended to the string + // so far. Does not count any bytes that were dropped due to overflow. + int length() const { return (ptr_ - base_); } + + // Return the number of bytes that can be added to this. + int space_left() const { return (limit_ - ptr_); } + + // Format the supplied arguments according to the "format" string + // and append to this. Will silently truncate the output if it does + // not fit. + void Printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; + + private: + // We can write into [ptr_ .. limit_-1]. + // *limit_ is also writable, but reserved for a terminating \0 + // in case we overflow. + // + // Invariants: *ptr_ == \0 + // Invariants: *limit_ == \0 + char* base_; // Initial pointer + char* ptr_; // Where should we write next + char* limit_; // One past last non-\0 char we can write + + DISALLOW_COPY_AND_ASSIGN(RawPrinter); +}; + +} + +#endif // BASE_RAW_PRINTER_H_ diff --git a/src/third_party/gperftools-2.7/src/sampler.cc b/src/third_party/gperftools-2.7/src/sampler.cc new file mode 100644 index 00000000000..e63d4cbc9bb --- /dev/null +++ b/src/third_party/gperftools-2.7/src/sampler.cc @@ -0,0 +1,133 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#include "sampler.h" + +#include <algorithm> // For min() +#include <math.h> +#include "base/commandlineflags.h" + +using std::min; + +// The approximate gap in bytes between sampling actions. +// I.e., we take one sample approximately once every +// tcmalloc_sample_parameter bytes of allocation +// i.e. about once every 512KB if value is 1<<19. +#ifdef NO_TCMALLOC_SAMPLES +DEFINE_int64(tcmalloc_sample_parameter, 0, + "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); +#else +DEFINE_int64(tcmalloc_sample_parameter, + EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0), + "The approximate gap in bytes between sampling actions. " + "This must be between 1 and 2^58."); +#endif + +namespace tcmalloc { + +int Sampler::GetSamplePeriod() { + return FLAGS_tcmalloc_sample_parameter; +} + +// Run this before using your sampler +void Sampler::Init(uint64_t seed) { + ASSERT(seed != 0); + + // Initialize PRNG + rnd_ = seed; + // Step it forward 20 times for good measure + for (int i = 0; i < 20; i++) { + rnd_ = NextRandom(rnd_); + } + // Initialize counter + bytes_until_sample_ = PickNextSamplingPoint(); +} + +#define MAX_SSIZE (static_cast<ssize_t>(static_cast<size_t>(static_cast<ssize_t>(-1)) >> 1)) + +// Generates a geometric variable with the specified mean (512K by default). +// This is done by generating a random number between 0 and 1 and applying +// the inverse cumulative distribution function for an exponential. +// Specifically: Let m be the inverse of the sample period, then +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +ssize_t Sampler::PickNextSamplingPoint() { + if (FLAGS_tcmalloc_sample_parameter <= 0) { + // In this case, we don't want to sample ever, and the larger a + // value we put here, the longer until we hit the slow path + // again. However, we have to support the flag changing at + // runtime, so pick something reasonably large (to keep overhead + // low) but small enough that we'll eventually start to sample + // again. + return 16 << 20; + } + + rnd_ = NextRandom(rnd_); + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + double interval = + (log2(q) - 26) * (-log(2.0) * FLAGS_tcmalloc_sample_parameter); + + // Very large values of interval overflow ssize_t. If we happen to + // hit such improbable condition, we simply cheat and clamp interval + // to largest supported value. + return static_cast<ssize_t>(std::min<double>(interval, MAX_SSIZE)); +} + +bool Sampler::RecordAllocationSlow(size_t k) { + if (!initialized_) { + initialized_ = true; + Init(reinterpret_cast<uintptr_t>(this)); + if (static_cast<size_t>(bytes_until_sample_) >= k) { + bytes_until_sample_ -= k; + return true; + } + } + bytes_until_sample_ = PickNextSamplingPoint(); + return FLAGS_tcmalloc_sample_parameter <= 0; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/sampler.h b/src/third_party/gperftools-2.7/src/sampler.h new file mode 100644 index 00000000000..16d3b09f04a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/sampler.h @@ -0,0 +1,230 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#ifndef TCMALLOC_SAMPLER_H_ +#define TCMALLOC_SAMPLER_H_ + +#include "config.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, uint32_t, int32_t +#endif +#include <string.h> // for memcpy +#include "base/basictypes.h" // for ASSERT +#include "internal_logging.h" // for ASSERT +#include "static_vars.h" + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Sampler to decide when to create a sample trace for an allocation +// Not thread safe: Each thread should have it's own sampler object. +// Caller must use external synchronization if used +// from multiple threads. +// +// With 512K average sample step (the default): +// the probability of sampling a 4K allocation is about 0.00778 +// the probability of sampling a 1MB allocation is about 0.865 +// the probability of sampling a 1GB allocation is about 1.00000 +// In general, the probablity of sampling is an allocation of size X +// given a flag value of Y (default 1M) is: +// 1 - e^(-X/Y) +// +// With 128K average sample step: +// the probability of sampling a 1MB allocation is about 0.99966 +// the probability of sampling a 1GB allocation is about 1.0 +// (about 1 - 2**(-26)) +// With 1M average sample step: +// the probability of sampling a 4K allocation is about 0.00390 +// the probability of sampling a 1MB allocation is about 0.632 +// the probability of sampling a 1GB allocation is about 1.0 +// +// The sampler works by representing memory as a long stream from +// which allocations are taken. Some of the bytes in this stream are +// marked and if an allocation includes a marked byte then it is +// sampled. Bytes are marked according to a Poisson point process +// with each byte being marked independently with probability +// p = 1/tcmalloc_sample_parameter. This makes the probability +// of sampling an allocation of X bytes equal to the CDF of +// a geometric with mean tcmalloc_sample_parameter. (ie. the +// probability that at least one byte in the range is marked). This +// is accurately given by the CDF of the corresponding exponential +// distribution : 1 - e^(-X/tcmalloc_sample_parameter_) +// Independence of the byte marking ensures independence of +// the sampling of each allocation. +// +// This scheme is implemented by noting that, starting from any +// fixed place, the number of bytes until the next marked byte +// is geometrically distributed. This number is recorded as +// bytes_until_sample_. Every allocation subtracts from this +// number until it is less than 0. When this happens the current +// allocation is sampled. +// +// When an allocation occurs, bytes_until_sample_ is reset to +// a new independtly sampled geometric number of bytes. The +// memoryless property of the point process means that this may +// be taken as the number of bytes after the end of the current +// allocation until the next marked byte. This ensures that +// very large allocations which would intersect many marked bytes +// only result in a single call to PickNextSamplingPoint. +//------------------------------------------------------------------- + +class SamplerTest; + +class PERFTOOLS_DLL_DECL Sampler { + public: + // Initialize this sampler. + void Init(uint64_t seed); + + // Record allocation of "k" bytes. Return true if no further work + // is need, and false if allocation needed to be sampled. + bool RecordAllocation(size_t k); + + // Same as above (but faster), except: + // a) REQUIRES(k < std::numeric_limits<ssize_t>::max()) + // b) if this returns false, you must call RecordAllocation + // to confirm if sampling truly needed. + // + // The point of this function is to only deal with common case of no + // sampling and let caller (which is in malloc fast-path) to + // "escalate" to fuller and slower logic only if necessary. + bool TryRecordAllocationFast(size_t k); + + // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter) + ssize_t PickNextSamplingPoint(); + + // Returns the current sample period + static int GetSamplePeriod(); + + // The following are public for the purposes of testing + static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value + + // C++03 requires that types stored in TLS be POD. As a result, you must + // initialize these members to {0, 0, false} before using this class! + // + // TODO(ahh): C++11 support will let us make these private. + + // Bytes until we sample next. + // + // More specifically when bytes_until_sample_ is X, we can allocate + // X bytes without triggering sampling; on the (X+1)th allocated + // byte, the containing allocation will be sampled. + // + // Always non-negative with only very brief exceptions (see + // DecrementFast{,Finish}, so casting to size_t is ok. + ssize_t bytes_until_sample_; + uint64_t rnd_; // Cheap random number generator + bool initialized_; + + private: + friend class SamplerTest; + bool RecordAllocationSlow(size_t k); +}; + +inline bool Sampler::RecordAllocation(size_t k) { + // The first time we enter this function we expect bytes_until_sample_ + // to be zero, and we must call SampleAllocationSlow() to ensure + // proper initialization of static vars. + ASSERT(Static::IsInited() || bytes_until_sample_ == 0); + + // Note that we have to deal with arbitrarily large values of k + // here. Thus we're upcasting bytes_until_sample_ to unsigned rather + // than the other way around. And this is why this code cannot be + // merged with DecrementFast code below. + if (static_cast<size_t>(bytes_until_sample_) < k) { + bool result = RecordAllocationSlow(k); + ASSERT(Static::IsInited()); + return result; + } else { + bytes_until_sample_ -= k; + ASSERT(Static::IsInited()); + return true; + } +} + +inline bool Sampler::TryRecordAllocationFast(size_t k) { + // For efficiency reason, we're testing bytes_until_sample_ after + // decrementing it by k. This allows compiler to do sub <reg>, <mem> + // followed by conditional jump on sign. But it is correct only if k + // is actually smaller than largest ssize_t value. Otherwise + // converting k to signed value overflows. + // + // It would be great for generated code to be sub <reg>, <mem> + // followed by conditional jump on 'carry', which would work for + // arbitrary values of k, but there seem to be no way to express + // that in C++. + // + // Our API contract explicitly states that only small values of k + // are permitted. And thus it makes sense to assert on that. + ASSERT(static_cast<ssize_t>(k) >= 0); + + bytes_until_sample_ -= static_cast<ssize_t>(k); + if (PREDICT_FALSE(bytes_until_sample_ < 0)) { + // Note, we undo sampling counter update, since we're not actually + // handling slow path in the "needs sampling" case (calling + // RecordAllocationSlow to reset counter). And we do that in order + // to avoid non-tail calls in malloc fast-path. See also comments + // on declaration inside Sampler class. + // + // volatile is used here to improve compiler's choice of + // instuctions. We know that this path is very rare and that there + // is no need to keep previous value of bytes_until_sample_ in + // register. This helps compiler generate slightly more efficient + // sub <reg>, <mem> instruction for subtraction above. + volatile ssize_t *ptr = + const_cast<volatile ssize_t *>(&bytes_until_sample_); + *ptr += k; + return false; + } + return true; +} + +// Inline functions which are public for testing purposes + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t Sampler::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = 0x5DEECE66DULL; + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast<uint64_t>(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_SAMPLER_H_ diff --git a/src/third_party/gperftools-2.7/src/solaris/libstdc++.la b/src/third_party/gperftools-2.7/src/solaris/libstdc++.la new file mode 100644 index 00000000000..3edf4254192 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/solaris/libstdc++.la @@ -0,0 +1,51 @@ +# libstdc++.la - a libtool library file +# Generated by ltmain.sh - GNU libtool 1.4a-GCC3.0 (1.641.2.256 2001/05/28 20:09:07 with GCC-local changes) +# +# Please DO NOT delete this file! +# It is necessary for linking the library. + +# --- +# NOTE: This file lives in /usr/sfw/lib on Solaris 10. Unfortunately, +# due to an apparent bug in the Solaris 10 6/06 release, +# /usr/sfw/lib/libstdc++.la is empty. Below is the correct content, +# according to +# http://forum.java.sun.com/thread.jspa?threadID=5073150 +# By passing LDFLAGS='-Lsrc/solaris' to configure, make will pick up +# this copy of the file rather than the empty copy in /usr/sfw/lib. +# +# Also see +# http://www.technicalarticles.org/index.php/Compiling_MySQL_5.0_on_Solaris_10 +# +# Note: this is for 32-bit systems. If you have a 64-bit system, +# uncomment the appropriate dependency_libs line below. +# ---- + +# The name that we can dlopen(3). +dlname='libstdc++.so.6' + +# Names of this library. +library_names='libstdc++.so.6.0.3 libstdc++.so.6 libstdc++.so' + +# The name of the static archive. +old_library='libstdc++.a' + +# Libraries that this one depends upon. +# 32-bit version: +dependency_libs='-lc -lm -L/usr/sfw/lib -lgcc_s' +# 64-bit version: +#dependency_libs='-L/lib/64 -lc -lm -L/usr/sfw/lib/64 -lgcc_s' + +# Version information for libstdc++. +current=6 +age=0 +revision=3 + +# Is this an already installed library? +installed=yes + +# Files to dlopen/dlpreopen +dlopen='' +dlpreopen='' + +# Directory that this library needs to be installed in: +libdir='/usr/sfw/lib' diff --git a/src/third_party/gperftools-2.7/src/span.cc b/src/third_party/gperftools-2.7/src/span.cc new file mode 100644 index 00000000000..4d089640d18 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/span.cc @@ -0,0 +1,102 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "span.h" + +#include <string.h> // for NULL, memset + +#include "internal_logging.h" // for ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static + +namespace tcmalloc { + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0) { + span->history[span->nexthistory] = op; + span->value[span->nexthistory] = v; + span->nexthistory++; + if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; +} +#endif + +Span* NewSpan(PageID p, Length len) { + Span* result = Static::span_allocator()->New(); + memset(result, 0, sizeof(*result)); + result->start = p; + result->length = len; +#ifdef SPAN_HISTORY + result->nexthistory = 0; +#endif + return result; +} + +void DeleteSpan(Span* span) { +#ifndef NDEBUG + // In debug mode, trash the contents of deleted Spans + memset(span, 0x3f, sizeof(*span)); +#endif + Static::span_allocator()->Delete(span); +} + +void DLL_Init(Span* list) { + list->next = list; + list->prev = list; +} + +void DLL_Remove(Span* span) { + span->prev->next = span->next; + span->next->prev = span->prev; + span->prev = NULL; + span->next = NULL; +} + +int DLL_Length(const Span* list) { + int result = 0; + for (Span* s = list->next; s != list; s = s->next) { + result++; + } + return result; +} + +void DLL_Prepend(Span* list, Span* span) { + ASSERT(span->next == NULL); + ASSERT(span->prev == NULL); + span->next = list->next; + span->prev = list; + list->next->prev = span; + list->next = span; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/span.h b/src/third_party/gperftools-2.7/src/span.h new file mode 100644 index 00000000000..ca3f710ff8f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/span.h @@ -0,0 +1,175 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A Span is a contiguous run of pages. + +#ifndef TCMALLOC_SPAN_H_ +#define TCMALLOC_SPAN_H_ + +#include <config.h> +#include <set> +#include "common.h" +#include "base/logging.h" +#include "page_heap_allocator.h" + +namespace tcmalloc { + +struct SpanBestFitLess; +struct Span; + +// Store a pointer to a span along with a cached copy of its length. +// These are used as set elements to improve the performance of +// comparisons during tree traversal: the lengths are inline with the +// tree nodes and thus avoid expensive cache misses to dereference +// the actual Span objects in most cases. +struct SpanPtrWithLength { + explicit SpanPtrWithLength(Span* s); + + Span* span; + Length length; +}; +typedef std::set<SpanPtrWithLength, SpanBestFitLess, STLPageHeapAllocator<SpanPtrWithLength, void> > SpanSet; + +// Comparator for best-fit search, with address order as a tie-breaker. +struct SpanBestFitLess { + bool operator()(SpanPtrWithLength a, SpanPtrWithLength b) const; +}; + +// Information kept for a span (a contiguous run of pages). +struct Span { + PageID start; // Starting page number + Length length; // Number of pages in span + Span* next; // Used when in link list + Span* prev; // Used when in link list + union { + void* objects; // Linked list of free objects + + // Span may contain iterator pointing back at SpanSet entry of + // this span into set of large spans. It is used to quickly delete + // spans from those sets. span_iter_space is space for such + // iterator which lifetime is controlled explicitly. + char span_iter_space[sizeof(SpanSet::iterator)]; + }; + unsigned int refcount : 16; // Number of non-free objects + unsigned int sizeclass : 8; // Size-class for small objects (or 0) + unsigned int location : 2; // Is the span on a freelist, and if so, which? + unsigned int sample : 1; // Sampled object? + bool has_span_iter : 1; // Iff span_iter_space has valid + // iterator. Only for debug builds. + + // Sets iterator stored in span_iter_space. + // Requires has_span_iter == 0. + void SetSpanSetIterator(const SpanSet::iterator& iter); + // Copies out and destroys iterator stored in span_iter_space. + SpanSet::iterator ExtractSpanSetIterator(); + +#undef SPAN_HISTORY +#ifdef SPAN_HISTORY + // For debugging, we can keep a log events per span + int nexthistory; + char history[64]; + int value[64]; +#endif + + // What freelist the span is on: IN_USE if on none, or normal or returned + enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST }; +}; + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0); +#else +#define Event(s,o,v) ((void) 0) +#endif + +inline SpanPtrWithLength::SpanPtrWithLength(Span* s) + : span(s), + length(s->length) { +} + +inline bool SpanBestFitLess::operator()(SpanPtrWithLength a, SpanPtrWithLength b) const { + if (a.length < b.length) + return true; + if (a.length > b.length) + return false; + return a.span->start < b.span->start; +} + +inline void Span::SetSpanSetIterator(const SpanSet::iterator& iter) { + ASSERT(!has_span_iter); + has_span_iter = 1; + + new (span_iter_space) SpanSet::iterator(iter); +} + +inline SpanSet::iterator Span::ExtractSpanSetIterator() { + typedef SpanSet::iterator iterator_type; + + ASSERT(has_span_iter); + has_span_iter = 0; + + iterator_type* this_iter = + reinterpret_cast<iterator_type*>(span_iter_space); + iterator_type retval = *this_iter; + this_iter->~iterator_type(); + return retval; +} + +// Allocator/deallocator for spans +Span* NewSpan(PageID p, Length len); +void DeleteSpan(Span* span); + +// ------------------------------------------------------------------------- +// Doubly linked list of spans. +// ------------------------------------------------------------------------- + +// Initialize *list to an empty list. +void DLL_Init(Span* list); + +// Remove 'span' from the linked list in which it resides, updating the +// pointers of adjacent Spans and setting span's next and prev to NULL. +void DLL_Remove(Span* span); + +// Return true iff "list" is empty. +inline bool DLL_IsEmpty(const Span* list) { + return list->next == list; +} + +// Add span to the front of list. +void DLL_Prepend(Span* list, Span* span); + +// Return the length of the linked list. O(n) +int DLL_Length(const Span* list); + +} // namespace tcmalloc + +#endif // TCMALLOC_SPAN_H_ diff --git a/src/third_party/gperftools-2.7/src/stack_trace_table.cc b/src/third_party/gperftools-2.7/src/stack_trace_table.cc new file mode 100644 index 00000000000..1862124af3c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stack_trace_table.cc @@ -0,0 +1,160 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes + +#include <config.h> +#include "stack_trace_table.h" +#include <string.h> // for NULL, memset +#include "base/spinlock.h" // for SpinLockHolder +#include "common.h" // for StackTrace +#include "internal_logging.h" // for ASSERT, Log +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static + +namespace tcmalloc { + +bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, + const StackTrace& t) const { + const bool eq = (this->hash == h && this->trace.depth == t.depth); + for (int i = 0; eq && i < t.depth; ++i) { + if (this->trace.stack[i] != t.stack[i]) { + return false; + } + } + return eq; +} + +StackTraceTable::StackTraceTable() + : error_(false), + depth_total_(0), + bucket_total_(0), + table_(new Bucket*[kHashTableSize]()) { + memset(table_, 0, kHashTableSize * sizeof(Bucket*)); +} + +StackTraceTable::~StackTraceTable() { + delete[] table_; +} + +void StackTraceTable::AddTrace(const StackTrace& t) { + if (error_) { + return; + } + + // Hash function borrowed from base/heap-profile-table.cc + uintptr_t h = 0; + for (int i = 0; i < t.depth; ++i) { + h += reinterpret_cast<uintptr_t>(t.stack[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + const int idx = h % kHashTableSize; + + Bucket* b = table_[idx]; + while (b != NULL && !b->KeyEqual(h, t)) { + b = b->next; + } + if (b != NULL) { + b->count++; + b->trace.size += t.size; // keep cumulative size + } else { + depth_total_ += t.depth; + bucket_total_++; + b = Static::bucket_allocator()->New(); + if (b == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: could not allocate bucket", sizeof(*b)); + error_ = true; + } else { + b->hash = h; + b->trace = t; + b->count = 1; + b->next = table_[idx]; + table_[idx] = b; + } + } +} + +void** StackTraceTable::ReadStackTracesAndClear() { + if (error_) { + return NULL; + } + + // Allocate output array + const int out_len = bucket_total_ * 3 + depth_total_ + 1; + void** out = new void*[out_len]; + if (out == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed for stack traces", + out_len * sizeof(*out)); + return NULL; + } + + // Fill output array + int idx = 0; + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count)); + out[idx++] = reinterpret_cast<void*>(b->trace.size); // cumulative size + out[idx++] = reinterpret_cast<void*>(b->trace.depth); + for (int d = 0; d < b->trace.depth; ++d) { + out[idx++] = b->trace.stack[d]; + } + b = b->next; + } + } + out[idx++] = NULL; + ASSERT(idx == out_len); + + // Clear state + error_ = false; + depth_total_ = 0; + bucket_total_ = 0; + SpinLockHolder h(Static::pageheap_lock()); + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + Bucket* next = b->next; + Static::bucket_allocator()->Delete(b); + b = next; + } + table_[i] = NULL; + } + + return out; +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/stack_trace_table.h b/src/third_party/gperftools-2.7/src/stack_trace_table.h new file mode 100644 index 00000000000..e2897715354 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stack_trace_table.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes +// +// Utility class for coalescing sampled stack traces. Not thread-safe. + +#ifndef TCMALLOC_STACK_TRACE_TABLE_H_ +#define TCMALLOC_STACK_TRACE_TABLE_H_ + +#include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include "common.h" + +namespace tcmalloc { + +class PERFTOOLS_DLL_DECL StackTraceTable { + public: + // REQUIRES: L < pageheap_lock + StackTraceTable(); + ~StackTraceTable(); + + // Adds stack trace "t" to table. + // + // REQUIRES: L >= pageheap_lock + void AddTrace(const StackTrace& t); + + // Returns stack traces formatted per MallocExtension guidelines. + // May return NULL on error. Clears state before returning. + // + // REQUIRES: L < pageheap_lock + void** ReadStackTracesAndClear(); + + // Exposed for PageHeapAllocator + struct Bucket { + // Key + uintptr_t hash; + StackTrace trace; + + // Payload + int count; + Bucket* next; + + bool KeyEqual(uintptr_t h, const StackTrace& t) const; + }; + + // For testing + int depth_total() const { return depth_total_; } + int bucket_total() const { return bucket_total_; } + + private: + static const int kHashTableSize = 1 << 14; // => table_ is 128k + + bool error_; + int depth_total_; + int bucket_total_; + Bucket** table_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STACK_TRACE_TABLE_H_ diff --git a/src/third_party/gperftools-2.7/src/stacktrace.cc b/src/third_party/gperftools-2.7/src/stacktrace.cc new file mode 100644 index 00000000000..7e853d84e43 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace.cc @@ -0,0 +1,340 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace. +// +// There are three different ways we can try to get the stack trace: +// +// 1) Our hand-coded stack-unwinder. This depends on a certain stack +// layout, which is used by gcc (and those systems using a +// gcc-compatible ABI) on x86 systems, at least since gcc 2.95. +// It uses the frame pointer to do its work. +// +// 2) The libunwind library. This is still in development, and as a +// separate library adds a new dependency, abut doesn't need a frame +// pointer. It also doesn't call malloc. +// +// 3) The gdb unwinder -- also the one used by the c++ exception code. +// It's obviously well-tested, but has a fatal flaw: it can call +// malloc() from the unwinder. This is a problem because we're +// trying to use the unwinder to instrument malloc(). +// +// Note: if you add a new implementation here, make sure it works +// correctly when GetStackTrace() is called with max_depth == 0. +// Some code may do that. + +#include <config.h> +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp +#include <stdio.h> // for fprintf +#include "gperftools/stacktrace.h" +#include "base/commandlineflags.h" +#include "base/googleinit.h" +#include "getenv_safe.h" + + +// we're using plain struct and not class to avoid any possible issues +// during initialization. Struct of pointers is easy to init at +// link-time. +struct GetStackImplementation { + int (*GetStackFramesPtr)(void** result, int* sizes, int max_depth, + int skip_count); + + int (*GetStackFramesWithContextPtr)(void** result, int* sizes, int max_depth, + int skip_count, const void *uc); + + int (*GetStackTracePtr)(void** result, int max_depth, + int skip_count); + + int (*GetStackTraceWithContextPtr)(void** result, int max_depth, + int skip_count, const void *uc); + + const char *name; +}; + +#if HAVE_DECL_BACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h" +#define GST_SUFFIX generic +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_generic +#endif + +#ifdef HAVE_UNWIND_BACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_libgcc-inl.h" +#define GST_SUFFIX libgcc +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_libgcc +#endif + +// libunwind uses __thread so we check for both libunwind.h and +// __thread support +#if defined(HAVE_LIBUNWIND_H) && defined(HAVE_TLS) +#define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h" +#define GST_SUFFIX libunwind +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_libunwind +#endif // HAVE_LIBUNWIND_H + +#if defined(__i386__) || defined(__x86_64__) +#define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" +#define GST_SUFFIX x86 +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_x86 +#endif // i386 || x86_64 + +#if defined(__ppc__) || defined(__PPC__) +#if defined(__linux__) +#define STACKTRACE_INL_HEADER "stacktrace_powerpc-linux-inl.h" +#else +#define STACKTRACE_INL_HEADER "stacktrace_powerpc-darwin-inl.h" +#endif +#define GST_SUFFIX ppc +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_ppc +#endif + +#if defined(__arm__) +#define STACKTRACE_INL_HEADER "stacktrace_arm-inl.h" +#define GST_SUFFIX arm +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_arm +#endif + +#ifdef TCMALLOC_ENABLE_INSTRUMENT_STACKTRACE +#define STACKTRACE_INL_HEADER "stacktrace_instrument-inl.h" +#define GST_SUFFIX instrument +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_instrument +#endif + +// The Windows case -- probably cygwin and mingw will use one of the +// x86-includes above, but if not, we can fall back to windows intrinsics. +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +#define STACKTRACE_INL_HEADER "stacktrace_win32-inl.h" +#define GST_SUFFIX win32 +#include "stacktrace_impl_setup-inl.h" +#undef GST_SUFFIX +#undef STACKTRACE_INL_HEADER +#define HAVE_GST_win32 +#endif + +static GetStackImplementation *all_impls[] = { +#ifdef HAVE_GST_libgcc + &impl__libgcc, +#endif +#ifdef HAVE_GST_generic + &impl__generic, +#endif +#ifdef HAVE_GST_libunwind + &impl__libunwind, +#endif +#ifdef HAVE_GST_x86 + &impl__x86, +#endif +#ifdef HAVE_GST_arm + &impl__arm, +#endif +#ifdef HAVE_GST_ppc + &impl__ppc, +#endif +#ifdef HAVE_GST_instrument + &impl__instrument, +#endif +#ifdef HAVE_GST_win32 + &impl__win32, +#endif + NULL +}; + +// ppc and i386 implementations prefer arch-specific asm implementations. +// arm's asm implementation is broken +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || defined(__PPC__) +#if !defined(NO_FRAME_POINTER) +#define TCMALLOC_DONT_PREFER_LIBUNWIND +#endif +#endif + +static bool get_stack_impl_inited; + +#if defined(HAVE_GST_instrument) +static GetStackImplementation *get_stack_impl = &impl__instrument; +#elif defined(HAVE_GST_win32) +static GetStackImplementation *get_stack_impl = &impl__win32; +#elif defined(HAVE_GST_x86) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND) +static GetStackImplementation *get_stack_impl = &impl__x86; +#elif defined(HAVE_GST_ppc) && defined(TCMALLOC_DONT_PREFER_LIBUNWIND) +static GetStackImplementation *get_stack_impl = &impl__ppc; +#elif defined(HAVE_GST_libunwind) +static GetStackImplementation *get_stack_impl = &impl__libunwind; +#elif defined(HAVE_GST_libgcc) +static GetStackImplementation *get_stack_impl = &impl__libgcc; +#elif defined(HAVE_GST_generic) +static GetStackImplementation *get_stack_impl = &impl__generic; +#elif defined(HAVE_GST_arm) +static GetStackImplementation *get_stack_impl = &impl__arm; +#elif 0 +// This is for the benefit of code analysis tools that may have +// trouble with the computed #include above. +# include "stacktrace_x86-inl.h" +# include "stacktrace_libunwind-inl.h" +# include "stacktrace_generic-inl.h" +# include "stacktrace_powerpc-inl.h" +# include "stacktrace_win32-inl.h" +# include "stacktrace_arm-inl.h" +# include "stacktrace_instrument-inl.h" +#else +#error Cannot calculate stack trace: will need to write for your environment +#endif + +static int ATTRIBUTE_NOINLINE frame_forcer(int rv) { + return rv; +} + +static void init_default_stack_impl_inner(void); + +namespace tcmalloc { + bool EnterStacktraceScope(void); + void LeaveStacktraceScope(void); +} + +namespace { + using tcmalloc::EnterStacktraceScope; + using tcmalloc::LeaveStacktraceScope; + + class StacktraceScope { + bool stacktrace_allowed; + public: + StacktraceScope() { + stacktrace_allowed = true; + stacktrace_allowed = EnterStacktraceScope(); + } + bool IsStacktraceAllowed() { + return stacktrace_allowed; + } + ~StacktraceScope() { + if (stacktrace_allowed) { + LeaveStacktraceScope(); + } + } + }; +} + +PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, + int skip_count) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackFramesPtr(result, sizes, max_depth, skip_count)); +} + +PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, + int skip_count, const void *uc) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackFramesWithContextPtr( + result, sizes, max_depth, + skip_count, uc)); +} + +PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackTracePtr(result, max_depth, skip_count)); +} + +PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void *uc) { + StacktraceScope scope; + if (!scope.IsStacktraceAllowed()) { + return 0; + } + init_default_stack_impl_inner(); + return frame_forcer(get_stack_impl->GetStackTraceWithContextPtr( + result, max_depth, skip_count, uc)); +} + +static void init_default_stack_impl_inner(void) { + if (get_stack_impl_inited) { + return; + } + get_stack_impl_inited = true; + const char *val = TCMallocGetenvSafe("TCMALLOC_STACKTRACE_METHOD"); + if (!val || !*val) { + return; + } + for (GetStackImplementation **p = all_impls; *p; p++) { + GetStackImplementation *c = *p; + if (strcmp(c->name, val) == 0) { + get_stack_impl = c; + return; + } + } + fprintf(stderr, "Unknown or unsupported stacktrace method requested: %s. Ignoring it\n", val); +} + +static void init_default_stack_impl(void) { + init_default_stack_impl_inner(); + if (EnvToBool("TCMALLOC_STACKTRACE_METHOD_VERBOSE", false)) { + fprintf(stderr, "Chosen stacktrace method is %s\nSupported methods:\n", get_stack_impl->name); + for (GetStackImplementation **p = all_impls; *p; p++) { + GetStackImplementation *c = *p; + fprintf(stderr, "* %s\n", c->name); + } + fputs("\n", stderr); + } +} + +REGISTER_MODULE_INITIALIZER(stacktrace_init_default_stack_impl, init_default_stack_impl()); diff --git a/src/third_party/gperftools-2.7/src/stacktrace_arm-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_arm-inl.h new file mode 100644 index 00000000000..1586b8fec62 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_arm-inl.h @@ -0,0 +1,148 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Doug Kwan +// This is inspired by Craig Silverstein's PowerPC stacktrace code. +// + +#ifndef BASE_STACKTRACE_ARM_INL_H_ +#define BASE_STACKTRACE_ARM_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include "base/basictypes.h" // for NULL +#include <gperftools/stacktrace.h> + +// WARNING: +// This only works if all your code is in either ARM or THUMB mode. With +// interworking, the frame pointer of the caller can either be in r11 (ARM +// mode) or r7 (THUMB mode). A callee only saves the frame pointer of its +// mode in a fixed location on its stack frame. If the caller is a different +// mode, there is no easy way to find the frame pointer. It can either be +// still in the designated register or saved on stack along with other callee +// saved registers. + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void**) old_sp[-1]; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; + return new_sp; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +#ifdef __GNUC__ +void StacktraceArmDummyFunction() __attribute__((noinline)); +void StacktraceArmDummyFunction() { __asm__ volatile(""); } +#else +# error StacktraceArmDummyFunction() needs to be ported to this platform. +#endif +#endif // BASE_STACKTRACE_ARM_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { +#ifdef __GNUC__ + void **sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#else +# error reading stack point not yet supported on this platform. +#endif + + // On ARM, the return address is stored in the link register (r14). + // This is not saved on the stack frame of a leaf function. To + // simplify code that reads return addresses, we call a dummy + // function so that the return address of this function is also + // stored in the stack frame. This works at least for gcc. + StacktraceArmDummyFunction(); + + skip_count++; // skip parent frame due to indirection in stacktrace.cc + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<IS_STACK_FRAMES == 0>(sp); + + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *sp; + +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_generic-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_generic-inl.h new file mode 100644 index 00000000000..7d7c22d9e45 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_generic-inl.h @@ -0,0 +1,84 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Portable implementation - just use glibc +// +// Note: The glibc implementation may cause a call to malloc. +// This can cause a deadlock in HeapProfiler. + +#ifndef BASE_STACKTRACE_GENERIC_INL_H_ +#define BASE_STACKTRACE_GENERIC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <execinfo.h> +#include <string.h> +#include "gperftools/stacktrace.h" +#endif // BASE_STACKTRACE_GENERIC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count += 2; // we want to skip the current and it's parent frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); +#endif + + return result_count; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_impl_setup-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_impl_setup-inl.h new file mode 100644 index 00000000000..698c5b38196 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_impl_setup-inl.h @@ -0,0 +1,94 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// NOTE: this is NOT to be #include-d normally. It's internal +// implementation detail of stacktrace.cc +// + +// Copyright (c) 2014, gperftools Contributors. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Aliaksey Kandratsenka <alk@tut.by> +// +// based on stacktrace.cc and stacktrace_config.h by Sanjay Ghemawat +// and Paul Pluzhnikov from Google Inc + +#define SIS_CONCAT2(a, b) a##b +#define SIS_CONCAT(a, b) SIS_CONCAT2(a,b) + +#define SIS_STRINGIFY(a) SIS_STRINGIFY2(a) +#define SIS_STRINGIFY2(a) #a + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackTrace_, GST_SUFFIX)(void **result, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackFrames_, GST_SUFFIX)(void **result, int *sizes, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX)(void **result, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX)(void **result, int *sizes, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +static GetStackImplementation SIS_CONCAT(impl__,GST_SUFFIX) = { + SIS_CONCAT(GetStackFrames_, GST_SUFFIX), + SIS_CONCAT(GetStackFramesWithContext_, GST_SUFFIX), + SIS_CONCAT(GetStackTrace_, GST_SUFFIX), + SIS_CONCAT(GetStackTraceWithContext_, GST_SUFFIX), + SIS_STRINGIFY(GST_SUFFIX) +}; + +#undef SIS_CONCAT2 +#undef SIS_CONCAT diff --git a/src/third_party/gperftools-2.7/src/stacktrace_instrument-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_instrument-inl.h new file mode 100755 index 00000000000..c631765c8a2 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_instrument-inl.h @@ -0,0 +1,155 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Jean Lee <xiaoyur347@gmail.com> +// based on gcc Code-Gen-Options "-finstrument-functions" listed in +// http://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html . +// Should run configure with CXXFLAGS="-finstrument-functions". + +// This file is a backtrace implementation for systems : +// * The glibc implementation of backtrace() may cause a call to malloc, +// and cause a deadlock in HeapProfiler. +// * The libunwind implementation prints no backtrace. + +// The backtrace arrays are stored in "thread_back_trace" variable. +// Maybe to use thread local storage is better and should save memorys. + +#ifndef BASE_STACKTRACE_INSTRUMENT_INL_H_ +#define BASE_STACKTRACE_INSTRUMENT_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <execinfo.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include "gperftools/stacktrace.h" + +#define gettid() syscall(__NR_gettid) +#ifndef __x86_64__ +#define MAX_THREAD (32768) +#else +#define MAX_THREAD (65536) +#endif +#define MAX_DEPTH (30) +#define ATTRIBUTE_NOINSTRUMENT __attribute__ ((no_instrument_function)) + +typedef struct { + int stack_depth; + void* frame[MAX_DEPTH]; +}BACK_TRACE; + +static BACK_TRACE thread_back_trace[MAX_THREAD]; +extern "C" { +void __cyg_profile_func_enter(void *func_address, + void *call_site) ATTRIBUTE_NOINSTRUMENT; +void __cyg_profile_func_enter(void *func_address, void *call_site) { + (void)func_address; + + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + backtrace->stack_depth = stack_depth + 1; + if ( stack_depth >= MAX_DEPTH ) { + return; + } + backtrace->frame[stack_depth] = call_site; +} + +void __cyg_profile_func_exit(void *func_address, + void *call_site) ATTRIBUTE_NOINSTRUMENT; +void __cyg_profile_func_exit(void *func_address, void *call_site) { + (void)func_address; + (void)call_site; + + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + backtrace->stack_depth = stack_depth - 1; + if ( stack_depth >= MAX_DEPTH ) { + return; + } + backtrace->frame[stack_depth] = 0; +} +} // extern "C" + +static int cyg_backtrace(void **buffer, int size) { + BACK_TRACE* backtrace = thread_back_trace + gettid(); + int stack_depth = backtrace->stack_depth; + if ( stack_depth >= MAX_DEPTH ) { + stack_depth = MAX_DEPTH; + } + int nSize = (size > stack_depth) ? stack_depth : size; + for (int i = 0; i < nSize; i++) { + buffer[i] = backtrace->frame[nSize - i - 1]; + } + + return nSize; +} + +#endif // BASE_STACKTRACE_INSTRUMENT_INL_H_ + + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + memset(stack, 0, sizeof(stack)); + + size = cyg_backtrace(stack, kStackLength); + skip_count += 2; // we want to skip the current and parent frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); +#endif + + return result_count; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_libgcc-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_libgcc-inl.h new file mode 100644 index 00000000000..ce9cf5196ad --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_libgcc-inl.h @@ -0,0 +1,111 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2016, gperftools Contributors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file implements backtrace capturing via libgcc's +// _Unwind_Backtrace. This generally works almost always. It will fail +// sometimes when we're trying to capture backtrace from signal +// handler (i.e. in cpu profiler) while some C++ code is throwing +// exception. + +#ifndef BASE_STACKTRACE_LIBGCC_INL_H_ +#define BASE_STACKTRACE_LIBGCC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +extern "C" { +#include <assert.h> +#include <string.h> // for memset() +} + +#include <unwind.h> + +#include "gperftools/stacktrace.h" + +struct libgcc_backtrace_data { + void **array; + int skip; + int pos; + int limit; +}; + +static _Unwind_Reason_Code libgcc_backtrace_helper(struct _Unwind_Context *ctx, + void *_data) { + libgcc_backtrace_data *data = + reinterpret_cast<libgcc_backtrace_data *>(_data); + + if (data->skip > 0) { + data->skip--; + return _URC_NO_REASON; + } + + if (data->pos < data->limit) { + void *ip = reinterpret_cast<void *>(_Unwind_GetIP(ctx));; + data->array[data->pos++] = ip; + } + + return _URC_NO_REASON; +} + +#endif // BASE_STACKTRACE_LIBGCC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + libgcc_backtrace_data data; + data.array = result; + // we're also skipping current and parent's frame + data.skip = skip_count + 2; + data.pos = 0; + data.limit = max_depth; + + _Unwind_Backtrace(libgcc_backtrace_helper, &data); + + if (data.pos > 1 && data.array[data.pos - 1] == NULL) + --data.pos; + +#if IS_STACK_FRAMES + // No implementation for finding out the stack frame sizes. + memset(sizes, 0, sizeof(*sizes) * data.pos); +#endif + + return data.pos; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_libunwind-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_libunwind-inl.h new file mode 100644 index 00000000000..6f361ecd378 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_libunwind-inl.h @@ -0,0 +1,152 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// Produce stack trace using libunwind + +#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_ +#define BASE_STACKTRACE_LIBINWIND_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +// We only need local unwinder. +#define UNW_LOCAL_ONLY + +extern "C" { +#include <assert.h> +#include <string.h> // for memset() +#include <libunwind.h> +} +#include "gperftools/stacktrace.h" + +#include "base/basictypes.h" +#include "base/logging.h" + +// Sometimes, we can try to get a stack trace from within a stack +// trace, because libunwind can call mmap (maybe indirectly via an +// internal mmap based memory allocator), and that mmap gets trapped +// and causes a stack-trace request. If were to try to honor that +// recursive request, we'd end up with infinite recursion or deadlock. +// Luckily, it's safe to ignore those subsequent traces. In such +// cases, we return 0 to indicate the situation. +static __thread int recursive ATTR_INITIAL_EXEC; + +#if defined(TCMALLOC_ENABLE_UNWIND_FROM_UCONTEXT) && (defined(__i386__) || defined(__x86_64__)) && defined(__GNU_LIBRARY__) +#define BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT 1 +#endif + +#endif // BASE_STACKTRACE_LIBINWIND_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + void *ip; + int n = 0; + unw_cursor_t cursor; + unw_context_t uc; +#if IS_STACK_FRAMES + unw_word_t sp = 0, next_sp = 0; +#endif + + if (recursive) { + return 0; + } + ++recursive; + +#if (IS_WITH_CONTEXT && defined(BASE_STACKTRACE_UNW_CONTEXT_IS_UCONTEXT)) + if (ucp) { + uc = *(static_cast<unw_context_t *>(const_cast<void *>(ucp))); + /* this is a bit weird. profiler.cc calls us with signal's ucontext + * yet passing us 2 as skip_count and essentially assuming we won't + * use ucontext. */ + /* In order to fix that I'm going to assume that if ucp is + * non-null we're asked to ignore skip_count in case we're + * able to use ucp */ + skip_count = 0; + } else { + unw_getcontext(&uc); + skip_count += 2; // Do not include current and parent frame + } +#else + unw_getcontext(&uc); + skip_count += 2; // Do not include current and parent frame +#endif + + int ret = unw_init_local(&cursor, &uc); + assert(ret >= 0); + + while (skip_count--) { + if (unw_step(&cursor) <= 0) { + goto out; + } +#if IS_STACK_FRAMES + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + goto out; + } +#endif + } + + while (n < max_depth) { + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { + break; + } +#if IS_STACK_FRAMES + sizes[n] = 0; +#endif + result[n++] = ip; + if (unw_step(&cursor) <= 0) { + break; + } +#if IS_STACK_FRAMES + sp = next_sp; + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) { + break; + } + sizes[n - 1] = next_sp - sp; +#endif + } +out: + --recursive; + return n; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_powerpc-darwin-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-darwin-inl.h new file mode 100644 index 00000000000..c4c2edbc535 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-darwin-inl.h @@ -0,0 +1,158 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Produce stack trace. ABI documentation reference can be found at: +// * PowerPC32 ABI: https://www.power.org/documentation/ +// power-architecture-32-bit-abi-supplement-1-0-embeddedlinuxunified/ +// * PowerPC64 ABI: +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <gperftools/stacktrace.h> + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void **) *old_sp; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; + return new_sp; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { + void **sp; + // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) + // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a + // different asm syntax. I don't know quite the best way to discriminate + // systems using the old as from the new one; I've gone with __APPLE__. + // TODO(csilvers): use autoconf instead, to look for 'as --version' == 1 or 2 + __asm__ volatile ("mr %0,r1" : "=r" (sp)); + + // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack + // entry that holds the return address of the subroutine call (what + // instruction we run after our function finishes). This is the + // same as the stack-pointer of our parent routine, which is what we + // want here. While the compiler will always(?) set up LR for + // subroutine calls, it may not for leaf functions (such as this one). + // This routine forces the compiler (at least gcc) to push it anyway. + StacktracePowerPCDummyFunction(); + +#if IS_STACK_FRAMES + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). +#else + // The LR save area is used by the callee, so the top entry is bogus. + skip_count++; +#endif + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + void **next_sp = NextStackFrame<!IS_STACK_FRAMES>(sp); + + if (skip_count > 0) { + skip_count--; + } else { + // PowerPC has 3 main ABIs, which say where in the stack the + // Link Register is. For DARWIN and AIX (used by apple and + // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc), + // it's in sp[1]. +#if defined(__PPC64__) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + result[n] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + result[n] = *(sp+1); +#endif + +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_powerpc-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-inl.h new file mode 100644 index 00000000000..811d6cc97ee --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-inl.h @@ -0,0 +1,176 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Produce stack trace. I'm guessing (hoping!) the code is much like +// for x86. For apple machines, at least, it seems to be; see +// http://developer.apple.com/documentation/mac/runtimehtml/RTArch-59.html +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK +// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <gperftools/stacktrace.h> + +struct layout_ppc { + struct layout_ppc *next; +#if defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + long condition_register; +#endif + void *return_addr; +}; + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static layout_ppc *NextStackFrame(layout_ppc *current) { + uintptr_t old_sp = (uintptr_t)(current); + uintptr_t new_sp = (uintptr_t)(current->next); + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) + return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if (new_sp - old_sp > 100000) + return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) + return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) && (new_sp - old_sp > 1000000)) + return NULL; + } + if (new_sp & (sizeof(void *) - 1)) + return NULL; + return current->next; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// Load instruction used on top-of-stack get. +#if defined(__PPC64__) || defined(__LP64__) +# define LOAD "ld" +#else +# define LOAD "lwz" +#endif + +#if defined(__linux__) && defined(__PPC__) +# define TOP_STACK "%0,0(1)" +#elif defined(__MACH__) && defined(__APPLE__) +// Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) +// and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a +// different asm syntax. I don't know quite the best way to discriminate +// systems using the old as from the new one; I've gone with __APPLE__. +// TODO(csilvers): use autoconf instead, to look for 'as --version' == 1 or 2 +# define TOP_STACK "%0,0(r1)" +#endif + + + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + layout_ppc *current; + int n; + + // Force GCC to spill LR. + asm volatile ("" : "=l"(current)); + + // Get the address on top-of-stack + asm volatile (LOAD " " TOP_STACK : "=r"(current)); + + StacktracePowerPCDummyFunction(); + + n = 0; + skip_count++; // skip parent's frame due to indirection in + // stacktrace.cc + while (current && n < max_depth) { + + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + layout_ppc *next = NextStackFrame<!IS_STACK_FRAMES>(current); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = current->return_addr; +#if IS_STACK_FRAMES + if (next > current) { + sizes[n] = (uintptr_t)next - (uintptr_t)current; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + current = next; + } + + // It's possible the second-last stack frame can't return + // (that is, it's __libc_start_main), in which case + // the CRT startup code will have set its LR to 'NULL'. + if (n > 0 && result[n-1] == NULL) + n--; + + return n; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_powerpc-linux-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-linux-inl.h new file mode 100644 index 00000000000..a301a46ad3c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_powerpc-linux-inl.h @@ -0,0 +1,231 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Produce stack trace. ABI documentation reference can be found at: +// * PowerPC32 ABI: https://www.power.org/documentation/ +// power-architecture-32-bit-abi-supplement-1-0-embeddedlinuxunified/ +// * PowerPC64 ABI: +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK + +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <signal.h> // for siginfo_t +#include <gperftools/stacktrace.h> +#include <base/vdso_support.h> + +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t +#endif + +// PowerPC64 Little Endian follows BE wrt. backchain, condition register, +// and LR save area, so no need to adjust the reading struct. +struct layout_ppc { + struct layout_ppc *next; +#ifdef __PPC64__ + long condition_register; +#endif + void *return_addr; +}; + +// Signal callbacks are handled by the vDSO symbol: +// +// * PowerPC64 Linux (arch/powerpc/kernel/vdso64/sigtramp.S): +// __kernel_sigtramp_rt64 +// * PowerPC32 Linux (arch/powerpc/kernel/vdso32/sigtramp.S): +// __kernel_sigtramp32 +// __kernel_sigtramp_rt32 +// +// So a backtrace may need to specially handling if the symbol readed is +// the signal trampoline. + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static layout_ppc *NextStackFrame(layout_ppc *current) { + uintptr_t old_sp = (uintptr_t)(current); + uintptr_t new_sp = (uintptr_t)(current->next); + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) + return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if (new_sp - old_sp > 100000) + return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) + return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) && (new_sp - old_sp > 1000000)) + return NULL; + } + if (new_sp & (sizeof(void *) - 1)) + return NULL; + return current->next; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// Load instruction used on top-of-stack get. +#if defined(__PPC64__) || defined(__LP64__) +# define LOAD "ld" +#else +# define LOAD "lwz" +#endif + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +static int GET_STACK_TRACE_OR_FRAMES { + layout_ppc *current; + int n; + + // Get the address on top-of-stack + current = reinterpret_cast<layout_ppc*> (__builtin_frame_address (0)); + // And ignore the current symbol + current = current->next; + + StacktracePowerPCDummyFunction(); + + n = 0; + skip_count++; // skip parent's frame due to indirection in + // stacktrace.cc + + base::VDSOSupport vdso; + base::ElfMemImage::SymbolInfo rt_sigreturn_symbol_info; +#ifdef __PPC64__ + const void *sigtramp64_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp_rt64", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp64_vdso = rt_sigreturn_symbol_info.address; +#else + const void *sigtramp32_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp32", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp32_vdso = rt_sigreturn_symbol_info.address; + const void *sigtramp32_rt_vdso = 0; + if (vdso.LookupSymbol("__kernel_sigtramp_rt32", "LINUX_2.6.15", STT_NOTYPE, + &rt_sigreturn_symbol_info)) + sigtramp32_rt_vdso = rt_sigreturn_symbol_info.address; +#endif + + while (current && n < max_depth) { + + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + layout_ppc *next = NextStackFrame<!IS_STACK_FRAMES>(current); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = current->return_addr; +#ifdef __PPC64__ + if (sigtramp64_vdso && (sigtramp64_vdso == current->return_addr)) { + struct signal_frame_64 { + char dummy[128]; + ucontext_t uc; + // We don't care about the rest, since the IP value is at 'uc' field. + } *sigframe = reinterpret_cast<signal_frame_64*>(current); + result[n] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + } +#else + if (sigtramp32_vdso && (sigtramp32_vdso == current->return_addr)) { + struct signal_frame_32 { + char dummy[64]; + struct sigcontext sctx; + mcontext_t mctx; + // We don't care about the rest, since IP value is at 'mctx' field. + } *sigframe = reinterpret_cast<signal_frame_32*>(current); + result[n] = (void*) sigframe->mctx.gregs[PT_NIP]; + } else if (sigtramp32_rt_vdso && (sigtramp32_rt_vdso == current->return_addr)) { + struct rt_signal_frame_32 { + char dummy[64 + 16]; + siginfo_t info; + ucontext_t uc; + // We don't care about the rest, since IP value is at 'uc' field.A + } *sigframe = reinterpret_cast<rt_signal_frame_32*>(current); + result[n] = (void*) sigframe->uc.uc_mcontext.uc_regs->gregs[PT_NIP]; + } +#endif + +#if IS_STACK_FRAMES + if (next > current) { + sizes[n] = (uintptr_t)next - (uintptr_t)current; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + current = next; + } + + // It's possible the second-last stack frame can't return + // (that is, it's __libc_start_main), in which case + // the CRT startup code will have set its LR to 'NULL'. + if (n > 0 && result[n-1] == NULL) + n--; + + return n; +} diff --git a/src/third_party/gperftools-2.7/src/stacktrace_win32-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_win32-inl.h new file mode 100644 index 00000000000..663e9a5bfb6 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_win32-inl.h @@ -0,0 +1,107 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Produces a stack trace for Windows. Normally, one could use +// stacktrace_x86-inl.h or stacktrace_x86_64-inl.h -- and indeed, that +// should work for binaries compiled using MSVC in "debug" mode. +// However, in "release" mode, Windows uses frame-pointer +// optimization, which makes getting a stack trace very difficult. +// +// There are several approaches one can take. One is to use Windows +// intrinsics like StackWalk64. These can work, but have restrictions +// on how successful they can be. Another attempt is to write a +// version of stacktrace_x86-inl.h that has heuristic support for +// dealing with FPO, similar to what WinDbg does (see +// http://www.nynaeve.net/?p=97). +// +// The solution we've ended up doing is to call the undocumented +// windows function RtlCaptureStackBackTrace, which probably doesn't +// work with FPO but at least is fast, and doesn't require a symbol +// server. +// +// This code is inspired by a patch from David Vitek: +// http://code.google.com/p/gperftools/issues/detail?id=83 + +#ifndef BASE_STACKTRACE_WIN32_INL_H_ +#define BASE_STACKTRACE_WIN32_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include "config.h" +#include <windows.h> // for GetProcAddress and GetModuleHandle +#include <assert.h> + +typedef USHORT NTAPI RtlCaptureStackBackTrace_Function( + IN ULONG frames_to_skip, + IN ULONG frames_to_capture, + OUT PVOID *backtrace, + OUT PULONG backtrace_hash); + +// Load the function we need at static init time, where we don't have +// to worry about someone else holding the loader's lock. +static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = + (RtlCaptureStackBackTrace_Function*) + GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace"); + +static int GetStackTrace_win32(void** result, int max_depth, + int skip_count) { + if (!RtlCaptureStackBackTrace_fn) { + // TODO(csilvers): should we log an error here? + return 0; // can't find a stacktrace with no function to call + } + return (int)RtlCaptureStackBackTrace_fn(skip_count + 3, max_depth, + result, 0); +} + +static int not_implemented(void) { + assert(0 == "Not yet implemented"); + return 0; +} + +static int GetStackFrames_win32(void** /* pcs */, + int* /* sizes */, + int /* max_depth */, + int /* skip_count */) { + return not_implemented(); +} + +static int GetStackFramesWithContext_win32(void** result, int* sizes, int max_depth, + int skip_count, const void *uc) { + return not_implemented(); +} + +static int GetStackTraceWithContext_win32(void** result, int max_depth, + int skip_count, const void *uc) { + return not_implemented(); +} + + +#endif // BASE_STACKTRACE_WIN32_INL_H_ diff --git a/src/third_party/gperftools-2.7/src/stacktrace_x86-inl.h b/src/third_party/gperftools-2.7/src/stacktrace_x86-inl.h new file mode 100644 index 00000000000..46eb5d82d71 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/stacktrace_x86-inl.h @@ -0,0 +1,354 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace + +#ifndef BASE_STACKTRACE_X86_INL_H_ +#define BASE_STACKTRACE_X86_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + +#include "config.h" +#include <stdlib.h> // for NULL +#include <assert.h> +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t +#elif defined(HAVE_CYGWIN_SIGNAL_H) +// cygwin/signal.h has a buglet where it uses pthread_attr_t without +// #including <pthread.h> itself. So we have to do it. +# ifdef HAVE_PTHREAD +# include <pthread.h> +# endif +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for msync +#include "base/vdso_support.h" +#endif + +#include "gperftools/stacktrace.h" + +#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP) +// Count "push %reg" instructions in VDSO __kernel_vsyscall(), +// preceding "syscall" or "sysenter". +// If __kernel_vsyscall uses frame pointer, answer 0. +// +// kMaxBytes tells how many instruction bytes of __kernel_vsyscall +// to analyze before giving up. Up to kMaxBytes+1 bytes of +// instructions could be accessed. +// +// Here are known __kernel_vsyscall instruction sequences: +// +// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S). +// Used on Intel. +// 0xffffe400 <__kernel_vsyscall+0>: push %ecx +// 0xffffe401 <__kernel_vsyscall+1>: push %edx +// 0xffffe402 <__kernel_vsyscall+2>: push %ebp +// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp +// 0xffffe405 <__kernel_vsyscall+5>: sysenter +// +// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S). +// Used on AMD. +// 0xffffe400 <__kernel_vsyscall+0>: push %ebp +// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp +// 0xffffe403 <__kernel_vsyscall+3>: syscall +// +// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S) +// 0xffffe400 <__kernel_vsyscall+0>: int $0x80 +// 0xffffe401 <__kernel_vsyscall+1>: ret +// +static const int kMaxBytes = 10; + +// We use assert()s instead of DCHECK()s -- this is too low level +// for DCHECK(). + +static int CountPushInstructions(const unsigned char *const addr) { + int result = 0; + for (int i = 0; i < kMaxBytes; ++i) { + if (addr[i] == 0x89) { + // "mov reg,reg" + if (addr[i + 1] == 0xE5) { + // Found "mov %esp,%ebp". + return 0; + } + ++i; // Skip register encoding byte. + } else if (addr[i] == 0x0F && + (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) { + // Found "sysenter" or "syscall". + return result; + } else if ((addr[i] & 0xF0) == 0x50) { + // Found "push %reg". + ++result; + } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) { + // Found "int $0x80" + assert(result == 0); + return 0; + } else { + // Unexpected instruction. + assert(0 == "unexpected instruction in __kernel_vsyscall"); + return 0; + } + } + // Unexpected: didn't find SYSENTER or SYSCALL in + // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval. + assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall"); + return 0; +} +#endif + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING, bool WITH_CONTEXT> +static void **NextStackFrame(void **old_sp, const void *uc) { + void **new_sp = (void **) *old_sp; + +#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT) + if (WITH_CONTEXT && uc != NULL) { + // How many "push %reg" instructions are there at __kernel_vsyscall? + // This is constant for a given kernel and processor, so compute + // it only once. + static int num_push_instructions = -1; // Sentinel: not computed yet. + // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly + // be there. + static const unsigned char *kernel_rt_sigreturn_address = NULL; + static const unsigned char *kernel_vsyscall_address = NULL; + if (num_push_instructions == -1) { + base::VDSOSupport vdso; + if (vdso.IsPresent()) { + base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info; + base::VDSOSupport::SymbolInfo vsyscall_symbol_info; + if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", + STT_FUNC, &rt_sigreturn_symbol_info) || + !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", + STT_FUNC, &vsyscall_symbol_info) || + rt_sigreturn_symbol_info.address == NULL || + vsyscall_symbol_info.address == NULL) { + // Unexpected: 32-bit VDSO is present, yet one of the expected + // symbols is missing or NULL. + assert(0 == "VDSO is present, but doesn't have expected symbols"); + num_push_instructions = 0; + } else { + kernel_rt_sigreturn_address = + reinterpret_cast<const unsigned char *>( + rt_sigreturn_symbol_info.address); + kernel_vsyscall_address = + reinterpret_cast<const unsigned char *>( + vsyscall_symbol_info.address); + num_push_instructions = + CountPushInstructions(kernel_vsyscall_address); + } + } else { + num_push_instructions = 0; + } + } + if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL && + old_sp[1] == kernel_rt_sigreturn_address) { + const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); + // This kernel does not use frame pointer in its VDSO code, + // and so %ebp is not suitable for unwinding. + void **const reg_ebp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); + const unsigned char *const reg_eip = + reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); + if (new_sp == reg_ebp && + kernel_vsyscall_address <= reg_eip && + reg_eip - kernel_vsyscall_address < kMaxBytes) { + // We "stepped up" to __kernel_vsyscall, but %ebp is not usable. + // Restore from 'ucv' instead. + void **const reg_esp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]); + // Check that alleged %esp is not NULL and is reasonably aligned. + if (reg_esp && + ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) { + // Check that alleged %esp is actually readable. This is to prevent + // "double fault" in case we hit the first fault due to e.g. stack + // corruption. + // + // page_size is linker-initalized to avoid async-unsafe locking + // that GCC would otherwise insert (__cxa_guard_acquire etc). + static int page_size; + if (page_size == 0) { + // First time through. + page_size = getpagesize(); + } + void *const reg_esp_aligned = + reinterpret_cast<void *>( + (uintptr_t)(reg_esp + num_push_instructions - 1) & + ~(page_size - 1)); + if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) { + // Alleged %esp is readable, use it for further unwinding. + new_sp = reinterpret_cast<void **>( + reg_esp[num_push_instructions - 1]); + } + } + } + } + } +#endif + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + if (new_sp > old_sp) { + // And allow frames upto about 1MB. + const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp; + const uintptr_t acceptable_delta = 1000000; + if (delta > acceptable_delta) { + return NULL; + } + } + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; +#ifdef __i386__ + // On 64-bit machines, the stack pointer can be very close to + // 0xffffffff, so we explicitly check for a pointer into the + // last two pages in the address space + if ((uintptr_t)new_sp >= 0xffffe000) return NULL; +#endif +#ifdef HAVE_MMAP + if (!STRICT_UNWINDING) { + // Lax sanity checks cause a crash on AMD-based machines with + // VDSO-enabled kernels. + // Make an extra sanity check to insure new_sp is readable. + // Note: NextStackFrame<false>() is only called while the program + // is already on its last leg, so it's ok to be slow here. + static int page_size = getpagesize(); + void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1)); + if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1) + return NULL; + } +#endif + return new_sp; +} + +#endif // BASE_STACKTRACE_X86_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. + +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) + +static int GET_STACK_TRACE_OR_FRAMES { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + skip_count++; // skip parent's frame due to indirection in stacktrace.cc + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } +#if !IS_WITH_CONTEXT + const void *const ucp = NULL; +#endif + void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *(sp+1); +#if IS_STACK_FRAMES + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } +#endif + n++; + } + sp = next_sp; + } + return n; +} diff --git a/src/third_party/gperftools-2.7/src/static_vars.cc b/src/third_party/gperftools-2.7/src/static_vars.cc new file mode 100644 index 00000000000..3743d1a35e7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/static_vars.cc @@ -0,0 +1,146 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include <config.h> +#include "static_vars.h" +#include <stddef.h> // for NULL +#include <new> // for operator new +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_atfork +#endif +#include "internal_logging.h" // for CHECK_CONDITION +#include "common.h" +#include "sampler.h" // for Sampler +#include "getenv_safe.h" // TCMallocGetenvSafe +#include "base/googleinit.h" +#include "maybe_threads.h" + +namespace tcmalloc { + +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) +// These following two functions are registered via pthread_atfork to make +// sure the central_cache locks remain in a consisten state in the forked +// version of the thread. + +void CentralCacheLockAll() +{ + Static::pageheap_lock()->Lock(); + for (int i = 0; i < Static::num_size_classes(); ++i) + Static::central_cache()[i].Lock(); +} + +void CentralCacheUnlockAll() +{ + for (int i = 0; i < Static::num_size_classes(); ++i) + Static::central_cache()[i].Unlock(); + Static::pageheap_lock()->Unlock(); +} +#endif + +bool Static::inited_; +SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED); +SizeMap Static::sizemap_; +CentralFreeListPadded Static::central_cache_[kClassSizesMax]; +PageHeapAllocator<Span> Static::span_allocator_; +PageHeapAllocator<StackTrace> Static::stacktrace_allocator_; +Span Static::sampled_objects_; +PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_; +StackTrace* Static::growth_stacks_ = NULL; +Static::PageHeapStorage Static::pageheap_; + +void Static::InitStaticVars() { + sizemap_.Init(); + span_allocator_.Init(); + span_allocator_.New(); // Reduce cache conflicts + span_allocator_.New(); // Reduce cache conflicts + stacktrace_allocator_.Init(); + bucket_allocator_.Init(); + // Do a bit of sanitizing: make sure central_cache is aligned properly + CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0); + for (int i = 0; i < num_size_classes(); ++i) { + central_cache_[i].Init(i); + } + + new (&pageheap_.memory) PageHeap; + + bool aggressive_decommit = + tcmalloc::commandlineflags::StringToBool( + TCMallocGetenvSafe("TCMALLOC_AGGRESSIVE_DECOMMIT"), false); + + pageheap()->SetAggressiveDecommit(aggressive_decommit); + + inited_ = true; + + DLL_Init(&sampled_objects_); +} + +void Static::InitLateMaybeRecursive() { +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) \ + && !defined(__APPLE__) && !defined(TCMALLOC_NO_ATFORK) + // OSX has it's own way of handling atfork in malloc (see + // libc_override_osx.h). + // + // For other OSes we do pthread_atfork even if standard seemingly + // discourages pthread_atfork, asking apps to do only + // async-signal-safe calls between fork and exec. + // + // We're deliberately attempting to register atfork handlers as part + // of malloc initialization. So very early. This ensures that our + // handler is called last and that means fork will try to grab + // tcmalloc locks last avoiding possible issues with many other + // locks that are held around calls to malloc. I.e. if we don't do + // that, fork() grabbing malloc lock before such other lock would be + // prone to deadlock, if some other thread holds other lock and + // calls malloc. + // + // We still leave some way of disabling it via + // -DTCMALLOC_NO_ATFORK. It looks like on glibc even with fully + // static binaries malloc is really initialized very early. But I + // can see how combination of static linking and other libc-s could + // be less fortunate and allow some early app constructors to run + // before malloc is ever called. + + perftools_pthread_atfork( + CentralCacheLockAll, // parent calls before fork + CentralCacheUnlockAll, // parent calls after fork + CentralCacheUnlockAll); // child calls after fork +#endif + +#ifndef NDEBUG + // pthread_atfork above may malloc sometimes. Lets ensure we test + // that malloc works from here. + free(malloc(1)); +#endif +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/static_vars.h b/src/third_party/gperftools-2.7/src/static_vars.h new file mode 100644 index 00000000000..3eeae0f1e09 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/static_vars.h @@ -0,0 +1,130 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> +// +// Static variables shared by multiple classes. + +#ifndef TCMALLOC_STATIC_VARS_H_ +#define TCMALLOC_STATIC_VARS_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/spinlock.h" +#include "central_freelist.h" +#include "common.h" +#include "page_heap.h" +#include "page_heap_allocator.h" +#include "span.h" +#include "stack_trace_table.h" + +namespace tcmalloc { + +class Static { + public: + // Linker initialized, so this lock can be accessed at any time. + static SpinLock* pageheap_lock() { return &pageheap_lock_; } + + // Must be called before calling any of the accessors below. + static void InitStaticVars(); + static void InitLateMaybeRecursive(); + + // Central cache -- an array of free-lists, one per size-class. + // We have a separate lock per free-list to reduce contention. + static CentralFreeListPadded* central_cache() { return central_cache_; } + + static SizeMap* sizemap() { return &sizemap_; } + + static unsigned num_size_classes() { return sizemap_.num_size_classes; } + + ////////////////////////////////////////////////////////////////////// + // In addition to the explicit initialization comment, the variables below + // must be protected by pageheap_lock. + + // Page-level allocator. + static PageHeap* pageheap() { return reinterpret_cast<PageHeap *>(&pageheap_.memory); } + + static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; } + + static PageHeapAllocator<StackTrace>* stacktrace_allocator() { + return &stacktrace_allocator_; + } + + static StackTrace* growth_stacks() { return growth_stacks_; } + static void set_growth_stacks(StackTrace* s) { growth_stacks_ = s; } + + // State kept for sampled allocations (/pprof/heap support) + static Span* sampled_objects() { return &sampled_objects_; } + static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() { + return &bucket_allocator_; + } + + // Check if InitStaticVars() has been run. + static bool IsInited() { return inited_; } + + private: + // some unit tests depend on this and link to static vars + // imperfectly. Thus we keep those unhidden for now. Thankfully + // they're not performance-critical. + /* ATTRIBUTE_HIDDEN */ static bool inited_; + /* ATTRIBUTE_HIDDEN */ static SpinLock pageheap_lock_; + + // These static variables require explicit initialization. We cannot + // count on their constructors to do any initialization because other + // static variables may try to allocate memory before these variables + // can run their constructors. + + ATTRIBUTE_HIDDEN static SizeMap sizemap_; + ATTRIBUTE_HIDDEN static CentralFreeListPadded central_cache_[kClassSizesMax]; + ATTRIBUTE_HIDDEN static PageHeapAllocator<Span> span_allocator_; + ATTRIBUTE_HIDDEN static PageHeapAllocator<StackTrace> stacktrace_allocator_; + ATTRIBUTE_HIDDEN static Span sampled_objects_; + ATTRIBUTE_HIDDEN static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; + + // Linked list of stack traces recorded every time we allocated memory + // from the system. Useful for finding allocation sites that cause + // increase in the footprint of the system. The linked list pointer + // is stored in trace->stack[kMaxStackDepth-1]. + ATTRIBUTE_HIDDEN static StackTrace* growth_stacks_; + + // PageHeap uses a constructor for initialization. Like the members above, + // we can't depend on initialization order, so pageheap is new'd + // into this buffer. + union PageHeapStorage { + char memory[sizeof(PageHeap)]; + uintptr_t extra; // To force alignment + }; + ATTRIBUTE_HIDDEN static PageHeapStorage pageheap_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STATIC_VARS_H_ diff --git a/src/third_party/gperftools-2.7/src/symbolize.cc b/src/third_party/gperftools-2.7/src/symbolize.cc new file mode 100755 index 00000000000..88609ff403e --- /dev/null +++ b/src/third_party/gperftools-2.7/src/symbolize.cc @@ -0,0 +1,296 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This forks out to pprof to do the actual symbolizing. We might +// be better off writing our own in C++. + +#include "config.h" +#include "symbolize.h" +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> // for socketpair() -- needed by Symbolize +#endif +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> // for wait() -- needed by Symbolize +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#ifdef __MACH__ +#include <mach-o/dyld.h> // for GetProgramInvocationName() +#include <limits.h> // for PATH_MAX +#endif +#if defined(__CYGWIN__) || defined(__CYGWIN32__) +#include <io.h> // for get_osfhandle() +#endif +#include <string> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/sysinfo.h" +#if defined(__FreeBSD__) +#include <sys/sysctl.h> +#endif + +using std::string; +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + + +DEFINE_string(symbolize_pprof, + EnvToString("PPROF_PATH", "pprof"), + "Path to pprof to call for reporting function names."); + +// heap_profile_table_pprof may be referenced after destructors are +// called (since that's when leak-checking is done), so we make +// a more-permanent copy that won't ever get destroyed. +static string* g_pprof_path = new string(FLAGS_symbolize_pprof); + +// Returns NULL if we're on an OS where we can't get the invocation name. +// Using a static var is ok because we're not called from a thread. +static const char* GetProgramInvocationName() { +#if defined(HAVE_PROGRAM_INVOCATION_NAME) +#ifdef __UCLIBC__ + extern const char* program_invocation_name; // uclibc provides this +#else + extern char* program_invocation_name; // gcc provides this +#endif + return program_invocation_name; +#elif defined(__MACH__) + // We don't want to allocate memory for this since we may be + // calculating it when memory is corrupted. + static char program_invocation_name[PATH_MAX]; + if (program_invocation_name[0] == '\0') { // first time calculating + uint32_t length = sizeof(program_invocation_name); + if (_NSGetExecutablePath(program_invocation_name, &length)) + return NULL; + } + return program_invocation_name; +#elif defined(__FreeBSD__) + static char program_invocation_name[PATH_MAX]; + size_t len = sizeof(program_invocation_name); + static const int name[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; + if (!sysctl(name, 4, program_invocation_name, &len, NULL, 0)) + return program_invocation_name; + return NULL; +#else + return NULL; // figure out a way to get argv[0] +#endif +} + +// Prints an error message when you can't run Symbolize(). +static void PrintError(const char* reason) { + RAW_LOG(ERROR, + "*** WARNING: Cannot convert addresses to symbols in output below.\n" + "*** Reason: %s\n" + "*** If you cannot fix this, try running pprof directly.\n", + reason); +} + +void SymbolTable::Add(const void* addr) { + symbolization_table_[addr] = ""; +} + +const char* SymbolTable::GetSymbol(const void* addr) { + return symbolization_table_[addr]; +} + +// Updates symbolization_table with the pointers to symbol names corresponding +// to its keys. The symbol names are stored in out, which is allocated and +// freed by the caller of this routine. +// Note that the forking/etc is not thread-safe or re-entrant. That's +// ok for the purpose we need -- reporting leaks detected by heap-checker +// -- but be careful if you decide to use this routine for other purposes. +// Returns number of symbols read on error. If can't symbolize, returns 0 +// and emits an error message about why. +int SymbolTable::Symbolize() { +#if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) + PrintError("Perftools does not know how to call a sub-process on this O/S"); + return 0; +#else + const char* argv0 = GetProgramInvocationName(); + if (argv0 == NULL) { // can't call symbolize if we can't figure out our name + PrintError("Cannot figure out the name of this executable (argv0)"); + return 0; + } + if (access(g_pprof_path->c_str(), R_OK) != 0) { + PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)"); + return 0; + } + + // All this work is to do two-way communication. ugh. + int *child_in = NULL; // file descriptors + int *child_out = NULL; // for now, we don't worry about child_err + int child_fds[5][2]; // socketpair may be called up to five times below + + // The client program may close its stdin and/or stdout and/or stderr + // thus allowing socketpair to reuse file descriptors 0, 1 or 2. + // In this case the communication between the forked processes may be broken + // if either the parent or the child tries to close or duplicate these + // descriptors. The loop below produces two pairs of file descriptors, each + // greater than 2 (stderr). + for (int i = 0; i < 5; i++) { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { + for (int j = 0; j < i; j++) { + close(child_fds[j][0]); + close(child_fds[j][1]); + PrintError("Cannot create a socket pair"); + } + return 0; + } else { + if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { + if (child_in == NULL) { + child_in = child_fds[i]; + } else { + child_out = child_fds[i]; + for (int j = 0; j < i; j++) { + if (child_fds[j] == child_in) continue; + close(child_fds[j][0]); + close(child_fds[j][1]); + } + break; + } + } + } + } + + switch (fork()) { + case -1: { // error + close(child_in[0]); + close(child_in[1]); + close(child_out[0]); + close(child_out[1]); + PrintError("Unknown error calling fork()"); + return 0; + } + case 0: { // child + close(child_in[1]); // child uses the 0's, parent uses the 1's + close(child_out[1]); // child uses the 0's, parent uses the 1's + close(0); + close(1); + if (dup2(child_in[0], 0) == -1) _exit(1); + if (dup2(child_out[0], 1) == -1) _exit(2); + // Unset vars that might cause trouble when we fork + unsetenv("CPUPROFILE"); + unsetenv("HEAPPROFILE"); + unsetenv("HEAPCHECK"); + unsetenv("PERFTOOLS_VERBOSE"); + execlp(g_pprof_path->c_str(), g_pprof_path->c_str(), + "--symbols", argv0, NULL); + _exit(3); // if execvp fails, it's bad news for us + } + default: { // parent + close(child_in[0]); // child uses the 0's, parent uses the 1's + close(child_out[0]); // child uses the 0's, parent uses the 1's +#ifdef HAVE_POLL_H + // Waiting for 1ms seems to give the OS time to notice any errors. + poll(0, 0, 1); + // For maximum safety, we check to make sure the execlp + // succeeded before trying to write. (Otherwise we'll get a + // SIGPIPE.) For systems without poll.h, we'll just skip this + // check, and trust that the user set PPROF_PATH correctly! + struct pollfd pfd = { child_in[1], POLLOUT, 0 }; + if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || + (pfd.revents & (POLLHUP|POLLERR))) { + PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)"); + return 0; + } +#endif +#if defined(__CYGWIN__) || defined(__CYGWIN32__) + // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert. + const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]); + DumpProcSelfMaps(symbols_handle); +#else + DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin +#endif + + // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each + // address to feed to pprof. + const int kOutBufSize = 24 * symbolization_table_.size(); + char *pprof_buffer = new char[kOutBufSize]; + int written = 0; + for (SymbolMap::const_iterator iter = symbolization_table_.begin(); + iter != symbolization_table_.end(); ++iter) { + written += snprintf(pprof_buffer + written, kOutBufSize - written, + // pprof expects format to be 0xXXXXXX + "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first)); + } + write(child_in[1], pprof_buffer, strlen(pprof_buffer)); + close(child_in[1]); // that's all we need to write + delete[] pprof_buffer; + + const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); + int total_bytes_read = 0; + delete[] symbol_buffer_; + symbol_buffer_ = new char[kSymbolBufferSize]; + memset(symbol_buffer_, '\0', kSymbolBufferSize); + while (1) { + int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, + kSymbolBufferSize - total_bytes_read); + if (bytes_read < 0) { + close(child_out[1]); + PrintError("Cannot read data from pprof"); + return 0; + } else if (bytes_read == 0) { + close(child_out[1]); + wait(NULL); + break; + } else { + total_bytes_read += bytes_read; + } + } + // We have successfully read the output of pprof into out. Make sure + // the last symbol is full (we can tell because it ends with a \n). + if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') + return 0; + // make the symbolization_table_ values point to the output vector + SymbolMap::iterator fill = symbolization_table_.begin(); + int num_symbols = 0; + const char *current_name = symbol_buffer_; + for (int i = 0; i < total_bytes_read; i++) { + if (symbol_buffer_[i] == '\n') { + fill->second = current_name; + symbol_buffer_[i] = '\0'; + current_name = symbol_buffer_ + i + 1; + fill++; + num_symbols++; + } + } + return num_symbols; + } + } + PrintError("Unkown error (should never occur!)"); + return 0; // shouldn't be reachable +#endif +} diff --git a/src/third_party/gperftools-2.7/src/symbolize.h b/src/third_party/gperftools-2.7/src/symbolize.h new file mode 100644 index 00000000000..728d073308a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/symbolize.h @@ -0,0 +1,84 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein + +#ifndef TCMALLOC_SYMBOLIZE_H_ +#define TCMALLOC_SYMBOLIZE_H_ + +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include <stddef.h> // for NULL +#include <map> + +using std::map; + +// SymbolTable encapsulates the address operations necessary for stack trace +// symbolization. A common use-case is to Add() the addresses from one or +// several stack traces to a table, call Symbolize() once and use GetSymbol() +// to get the symbol names for pretty-printing the stack traces. +class SymbolTable { + public: + SymbolTable() + : symbol_buffer_(NULL) {} + ~SymbolTable() { + delete[] symbol_buffer_; + } + + // Adds an address to the table. This may overwrite a currently known symbol + // name, so Add() should not generally be called after Symbolize(). + void Add(const void* addr); + + // Returns the symbol name for addr, if the given address was added before + // the last successful call to Symbolize(). Otherwise may return an empty + // c-string. + const char* GetSymbol(const void* addr); + + // Obtains the symbol names for the addresses stored in the table and returns + // the number of addresses actually symbolized. + int Symbolize(); + + private: + typedef map<const void*, const char*> SymbolMap; + + // An average size of memory allocated for a stack trace symbol. + static const int kSymbolSize = 1024; + + // Map from addresses to symbol names. + SymbolMap symbolization_table_; + + // Pointer to the buffer that stores the symbol names. + char *symbol_buffer_; +}; + +#endif // TCMALLOC_SYMBOLIZE_H_ diff --git a/src/third_party/gperftools-2.7/src/system-alloc.cc b/src/third_party/gperftools-2.7/src/system-alloc.cc new file mode 100755 index 00000000000..292e482aef6 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/system-alloc.cc @@ -0,0 +1,555 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat + +#include <config.h> +#include <errno.h> // for EAGAIN, errno +#include <fcntl.h> // for open, O_RDWR +#include <stddef.h> // for size_t, NULL, ptrdiff_t +#if defined HAVE_STDINT_H +#include <stdint.h> // for uintptr_t, intptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sbrk, getpagesize, off_t +#endif +#include <new> // for operator new +#include <gperftools/malloc_extension.h> +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc +#include "common.h" +#include "internal_logging.h" + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// Linux added support for MADV_FREE in 4.5 but we aren't ready to use it +// yet. Among other things, using compile-time detection leads to poor +// results when compiling on a system with MADV_FREE and running on a +// system without it. See https://github.com/gperftools/gperftools/issues/780. +#if defined(__linux__) && defined(MADV_FREE) && !defined(TCMALLOC_USE_MADV_FREE) +# undef MADV_FREE +#endif + +// MADV_FREE is specifically designed for use by malloc(), but only +// FreeBSD supports it; in linux we fall back to the somewhat inferior +// MADV_DONTNEED. +#if !defined(MADV_FREE) && defined(MADV_DONTNEED) +# define MADV_FREE MADV_DONTNEED +#endif + +// Solaris has a bug where it doesn't declare madvise() for C++. +// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 +#if defined(__sun) && defined(__SVR4) +# include <sys/types.h> // for caddr_t + extern "C" { extern int madvise(caddr_t, size_t, int); } +#endif + +// Set kDebugMode mode so that we can have use C++ conditionals +// instead of preprocessor conditionals. +#ifdef NDEBUG +static const bool kDebugMode = false; +#else +static const bool kDebugMode = true; +#endif + +// TODO(sanjay): Move the code below into the tcmalloc namespace +using tcmalloc::kLog; +using tcmalloc::Log; + +// Check that no bit is set at position ADDRESS_BITS or higher. +static bool CheckAddressBits(uintptr_t ptr) { + bool always_ok = (kAddressBits == 8 * sizeof(void*)); + // this is a bit insane but otherwise we get compiler warning about + // shifting right by word size even if this code is dead :( + int shift_bits = always_ok ? 0 : kAddressBits; + return always_ok || ((ptr >> shift_bits) == 0); +} + +COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*), + address_bits_larger_than_pointer_size); + +static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); + +#if defined(HAVE_MMAP) || defined(MADV_FREE) +// Page size is initialized on demand (only needed for mmap-based allocators) +static size_t pagesize = 0; +#endif + +// The current system allocator +SysAllocator* tcmalloc_sys_alloc = NULL; + +// Number of bytes taken from system. +size_t TCMalloc_SystemTaken = 0; + +// Configuration parameters. +DEFINE_int32(malloc_devmem_start, + EnvToInt("TCMALLOC_DEVMEM_START", 0), + "Physical memory starting location in MB for /dev/mem allocation." + " Setting this to 0 disables /dev/mem allocation"); +DEFINE_int32(malloc_devmem_limit, + EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0), + "Physical memory limit location in MB for /dev/mem allocation." + " Setting this to 0 means no limit."); +DEFINE_bool(malloc_skip_sbrk, + EnvToBool("TCMALLOC_SKIP_SBRK", false), + "Whether sbrk can be used to obtain memory."); +DEFINE_bool(malloc_skip_mmap, + EnvToBool("TCMALLOC_SKIP_MMAP", false), + "Whether mmap can be used to obtain memory."); +DEFINE_bool(malloc_disable_memory_release, + EnvToBool("TCMALLOC_DISABLE_MEMORY_RELEASE", false), + "Whether MADV_FREE/MADV_DONTNEED should be used" + " to return unused memory to the system."); + +// static allocators +class SbrkSysAllocator : public SysAllocator { +public: + SbrkSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static union { + char buf[sizeof(SbrkSysAllocator)]; + void *ptr; +} sbrk_space; + +class MmapSysAllocator : public SysAllocator { +public: + MmapSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static union { + char buf[sizeof(MmapSysAllocator)]; + void *ptr; +} mmap_space; + +class DevMemSysAllocator : public SysAllocator { +public: + DevMemSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; + +class DefaultSysAllocator : public SysAllocator { + public: + DefaultSysAllocator() : SysAllocator() { + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = true; + allocs_[i] = NULL; + names_[i] = NULL; + } + } + void SetChildAllocator(SysAllocator* alloc, unsigned int index, + const char* name) { + if (index < kMaxAllocators && alloc != NULL) { + allocs_[index] = alloc; + failed_[index] = false; + names_[index] = name; + } + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + + private: + static const int kMaxAllocators = 2; + bool failed_[kMaxAllocators]; + SysAllocator* allocs_[kMaxAllocators]; + const char* names_[kMaxAllocators]; +}; +static union { + char buf[sizeof(DefaultSysAllocator)]; + void *ptr; +} default_space; +static const char sbrk_name[] = "SbrkSysAllocator"; +static const char mmap_name[] = "MmapSysAllocator"; + + +void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#if !defined(HAVE_SBRK) || defined(__UCLIBC__) + return NULL; +#else + // Check if we should use sbrk allocation. + // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // That means that even if this flag is set to true, some (initial) + // memory will be allocated with sbrk before the flag takes effect. + if (FLAGS_malloc_skip_sbrk) { + return NULL; + } + + // sbrk will release memory if passed a negative number, so we do + // a strict check here + if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; + + // This doesn't overflow because TCMalloc_SystemAlloc has already + // tested for overflow at the alignment boundary. + size = ((size + alignment - 1) / alignment) * alignment; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Check that we we're not asking for so much more memory that we'd + // wrap around the end of the virtual address space. (This seems + // like something sbrk() should check for us, and indeed opensolaris + // does, but glibc does not: + // http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true + // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc + // Without this check, sbrk may succeed when it ought to fail.) + if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { + return NULL; + } + + void* result = sbrk(size); + if (result == reinterpret_cast<void*>(-1)) { + return NULL; + } + + // Is it aligned? + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) == 0) return result; + + // Try to get more memory for alignment + size_t extra = alignment - (ptr & (alignment-1)); + void* r2 = sbrk(extra); + if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) { + // Contiguous with previous result + return reinterpret_cast<void*>(ptr + extra); + } + + // Give up and ask for "size + alignment - 1" bytes so + // that we can find an aligned region within it. + result = sbrk(size + alignment - 1); + if (result == reinterpret_cast<void*>(-1)) { + return NULL; + } + ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) != 0) { + ptr += alignment - (ptr & (alignment-1)); + } + return reinterpret_cast<void*>(ptr); +#endif // HAVE_SBRK +} + +void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + return NULL; +#else + // Check if we should use mmap allocation. + // FLAGS_malloc_skip_mmap starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // Chances are we never get here before the flags are initialized since + // sbrk is used until the heap is exhausted (before mmap is used). + if (FLAGS_malloc_skip_mmap) { + return NULL; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void* result = mmap(NULL, size + extra, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + return NULL; + } + + // Adjust the return memory so it is aligned + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + return NULL; +#else + static bool initialized = false; + static off_t physmem_base; // next physical memory address to allocate + static off_t physmem_limit; // maximum physical address allowed + static int physmem_fd; // file descriptor for /dev/mem + + // Check if we should use /dev/mem allocation. Note that it may take + // a while to get this flag initialized, so meanwhile we fall back to + // the next allocator. (It looks like 7MB gets allocated before + // this flag gets initialized -khr.) + if (FLAGS_malloc_devmem_start == 0) { + // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to + // try us again next time. + return NULL; + } + + if (!initialized) { + physmem_fd = open("/dev/mem", O_RDWR); + if (physmem_fd < 0) { + return NULL; + } + physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; + physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL; + initialized = true; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // check to see if we have any memory left + if (physmem_limit != 0 && + ((size + extra) > (physmem_limit - physmem_base))) { + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + MAP_SHARED, physmem_fd, physmem_base); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused virtual memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + physmem_base += adjust + size; + + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + for (int i = 0; i < kMaxAllocators; i++) { + if (!failed_[i] && allocs_[i] != NULL) { + void* result = allocs_[i]->Alloc(size, actual_size, alignment); + if (result != NULL) { + return result; + } + failed_[i] = true; + } + } + // After both failed, reset "failed_" to false so that a single failed + // allocation won't make the allocator never work again. + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = false; + } + return NULL; +} + +ATTRIBUTE_WEAK ATTRIBUTE_NOINLINE +SysAllocator *tc_get_sysalloc_override(SysAllocator *def) +{ + return def; +} + +static bool system_alloc_inited = false; +void InitSystemAllocators(void) { + MmapSysAllocator *mmap = new (mmap_space.buf) MmapSysAllocator(); + SbrkSysAllocator *sbrk = new (sbrk_space.buf) SbrkSysAllocator(); + + // In 64-bit debug mode, place the mmap allocator first since it + // allocates pointers that do not fit in 32 bits and therefore gives + // us better testing of code's 64-bit correctness. It also leads to + // less false negatives in heap-checking code. (Numbers are less + // likely to look like pointers and therefore the conservative gc in + // the heap-checker is less likely to misinterpret a number as a + // pointer). + DefaultSysAllocator *sdef = new (default_space.buf) DefaultSysAllocator(); + if (kDebugMode && sizeof(void*) > 4) { + sdef->SetChildAllocator(mmap, 0, mmap_name); + sdef->SetChildAllocator(sbrk, 1, sbrk_name); + } else { + sdef->SetChildAllocator(sbrk, 0, sbrk_name); + sdef->SetChildAllocator(mmap, 1, mmap_name); + } + + tcmalloc_sys_alloc = tc_get_sysalloc_override(sdef); +} + +void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + // Discard requests that overflow + if (size + alignment < size) return NULL; + + SpinLockHolder lock_holder(&spinlock); + + if (!system_alloc_inited) { + InitSystemAllocators(); + system_alloc_inited = true; + } + + // Enforce minimum alignment + if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); + + size_t actual_size_storage; + if (actual_size == NULL) { + actual_size = &actual_size_storage; + } + + void* result = tcmalloc_sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + CHECK_CONDITION( + CheckAddressBits(reinterpret_cast<uintptr_t>(result) + *actual_size - 1)); + TCMalloc_SystemTaken += *actual_size; + } + return result; +} + +bool TCMalloc_SystemRelease(void* start, size_t length) { +#ifdef MADV_FREE + if (FLAGS_malloc_devmem_start) { + // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been + // mapping /dev/mem for heap memory. + return false; + } + if (FLAGS_malloc_disable_memory_release) return false; + if (pagesize == 0) pagesize = getpagesize(); + const size_t pagemask = pagesize - 1; + + size_t new_start = reinterpret_cast<size_t>(start); + size_t end = new_start + length; + size_t new_end = end; + + // Round up the starting address and round down the ending address + // to be page aligned: + new_start = (new_start + pagesize - 1) & ~pagemask; + new_end = new_end & ~pagemask; + + ASSERT((new_start & pagemask) == 0); + ASSERT((new_end & pagemask) == 0); + ASSERT(new_start >= reinterpret_cast<size_t>(start)); + ASSERT(new_end <= end); + + if (new_end > new_start) { + int result; + do { + result = madvise(reinterpret_cast<char*>(new_start), + new_end - new_start, MADV_FREE); + } while (result == -1 && errno == EAGAIN); + + return result != -1; + } +#endif + return false; +} + +void TCMalloc_SystemCommit(void* start, size_t length) { + // Nothing to do here. TCMalloc_SystemRelease does not alter pages + // such that they need to be re-committed before they can be used by the + // application. +} diff --git a/src/third_party/gperftools-2.7/src/system-alloc.h b/src/third_party/gperftools-2.7/src/system-alloc.h new file mode 100644 index 00000000000..655d470804d --- /dev/null +++ b/src/third_party/gperftools-2.7/src/system-alloc.h @@ -0,0 +1,92 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routine that uses sbrk/mmap to allocate memory from the system. +// Useful for implementing malloc. + +#ifndef TCMALLOC_SYSTEM_ALLOC_H_ +#define TCMALLOC_SYSTEM_ALLOC_H_ + +#include <config.h> +#include <stddef.h> // for size_t + +class SysAllocator; + +// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment +// +// Allocate and return "N" bytes of zeroed memory. +// +// If actual_bytes is NULL then the returned memory is exactly the +// requested size. If actual bytes is non-NULL then the allocator +// may optionally return more bytes than asked for (i.e. return an +// entire "huge" page if a huge page allocator is in use). +// +// The returned pointer is a multiple of "alignment" if non-zero. The +// returned pointer will always be aligned suitably for holding a +// void*, double, or size_t. In addition, if this platform defines +// CACHELINE_ALIGNED, the return pointer will always be cacheline +// aligned. +// +// Returns NULL when out of memory. +extern PERFTOOLS_DLL_DECL +void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, + size_t alignment = 0); + +// This call is a hint to the operating system that the pages +// contained in the specified range of memory will not be used for a +// while, and can be released for use by other processes or the OS. +// Pages which are released in this way may be destroyed (zeroed) by +// the OS. The benefit of this function is that it frees memory for +// use by the system, the cost is that the pages are faulted back into +// the address space next time they are touched, which can impact +// performance. (Only pages fully covered by the memory region will +// be released, partial pages will not.) +// +// Returns false if release failed or not supported. +extern PERFTOOLS_DLL_DECL +bool TCMalloc_SystemRelease(void* start, size_t length); + +// Called to ressurect memory which has been previously released +// to the system via TCMalloc_SystemRelease. An attempt to +// commit a page that is already committed does not cause this +// function to fail. +extern PERFTOOLS_DLL_DECL +void TCMalloc_SystemCommit(void* start, size_t length); + +// The current system allocator. +extern PERFTOOLS_DLL_DECL SysAllocator* tcmalloc_sys_alloc; + +// Number of bytes taken from system. +extern PERFTOOLS_DLL_DECL size_t TCMalloc_SystemTaken; + +#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */ diff --git a/src/third_party/gperftools-2.7/src/tcmalloc.cc b/src/third_party/gperftools-2.7/src/tcmalloc.cc new file mode 100644 index 00000000000..7b18ddbcb71 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/tcmalloc.cc @@ -0,0 +1,2198 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A malloc that uses a per-thread cache to satisfy small malloc requests. +// (The time for malloc/free of a small object drops from 300 ns to 50 ns.) +// +// See docs/tcmalloc.html for a high-level +// description of how this malloc works. +// +// SYNCHRONIZATION +// 1. The thread-specific lists are accessed without acquiring any locks. +// This is safe because each such list is only accessed by one thread. +// 2. We have a lock per central free-list, and hold it while manipulating +// the central free list for a particular size. +// 3. The central page allocator is protected by "pageheap_lock". +// 4. The pagemap (which maps from page-number to descriptor), +// can be read without holding any locks, and written while holding +// the "pageheap_lock". +// 5. To improve performance, a subset of the information one can get +// from the pagemap is cached in a data structure, pagemap_cache_, +// that atomically reads and writes its entries. This cache can be +// read and written without locking. +// +// This multi-threaded access to the pagemap is safe for fairly +// subtle reasons. We basically assume that when an object X is +// allocated by thread A and deallocated by thread B, there must +// have been appropriate synchronization in the handoff of object +// X from thread A to thread B. The same logic applies to pagemap_cache_. +// +// THE PAGEID-TO-SIZECLASS CACHE +// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache +// returns 0 for a particular PageID then that means "no information," not that +// the sizeclass is 0. The cache may have stale information for pages that do +// not hold the beginning of any free()'able object. Staleness is eliminated +// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and +// do_memalign() for all other relevant pages. +// +// PAGEMAP +// ------- +// Page map contains a mapping from page id to Span. +// +// If Span s occupies pages [p..q], +// pagemap[p] == s +// pagemap[q] == s +// pagemap[p+1..q-1] are undefined +// pagemap[p-1] and pagemap[q+1] are defined: +// NULL if the corresponding page is not yet in the address space. +// Otherwise it points to a Span. This span may be free +// or allocated. If free, it is in one of pageheap's freelist. +// +// TODO: Bias reclamation to larger addresses +// TODO: implement mallinfo/mallopt +// TODO: Better testing +// +// 9/28/2003 (new page-level allocator replaces ptmalloc2): +// * malloc/free of small objects goes from ~300 ns to ~50 ns. +// * allocation of a reasonably complicated struct +// goes from about 1100 ns to about 300 ns. + +#include "config.h" +// At least for gcc on Linux/i386 and Linux/amd64 not adding throw() +// to tc_xxx functions actually ends up generating better code. +#define PERFTOOLS_NOTHROW +#include <gperftools/tcmalloc.h> + +#include <errno.h> // for ENOMEM, EINVAL, errno +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp, memset, strlen, etc +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpagesize, write, etc +#endif +#include <algorithm> // for max, min +#include <limits> // for numeric_limits +#include <new> // for nothrow_t (ptr only), etc +#include <vector> // for vector + +#include <gperftools/malloc_extension.h> +#include <gperftools/malloc_hook.h> // for MallocHook +#include <gperftools/nallocx.h> +#include "base/basictypes.h" // for int64 +#include "base/commandlineflags.h" // for RegisterFlagValidator, etc +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded +#include "common.h" // for StackTrace, kPageShift, etc +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "linked_list.h" // for SLL_SetNext +#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc +#include "page_heap.h" // for PageHeap, PageHeap::Stats +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "span.h" // for Span, DLL_Prepend, etc +#include "stack_trace_table.h" // for StackTraceTable +#include "static_vars.h" // for Static +#include "system-alloc.h" // for DumpSystemAllocatorStats, etc +#include "tcmalloc_guard.h" // for TCMallocGuard +#include "thread_cache.h" // for ThreadCache + +#include "maybe_emergency_malloc.h" + +#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) +# define WIN32_DO_PATCHING 1 +#endif + +// Some windows file somewhere (at least on cygwin) #define's small (!) +#undef small + +using STL_NAMESPACE::max; +using STL_NAMESPACE::min; +using STL_NAMESPACE::numeric_limits; +using STL_NAMESPACE::vector; + +#include "libc_override.h" + +using tcmalloc::AlignmentForSize; +using tcmalloc::kLog; +using tcmalloc::kCrash; +using tcmalloc::kCrashWithStats; +using tcmalloc::Log; +using tcmalloc::PageHeap; +using tcmalloc::PageHeapAllocator; +using tcmalloc::SizeMap; +using tcmalloc::Span; +using tcmalloc::StackTrace; +using tcmalloc::Static; +using tcmalloc::ThreadCache; + +DECLARE_double(tcmalloc_release_rate); + +// Those common architectures are known to be safe w.r.t. aliasing function +// with "extra" unused args to function with fewer arguments (e.g. +// tc_delete_nothrow being aliased to tc_delete). +// +// Benefit of aliasing is relatively moderate. It reduces instruction +// cache pressure a bit (not relevant for largely unused +// tc_delete_nothrow, but is potentially relevant for +// tc_delete_aligned (or sized)). It also used to be the case that gcc +// 5+ optimization for merging identical functions kicked in and +// "screwed" one of the otherwise identical functions with extra +// jump. I am not able to reproduce that anymore. +#if !defined(__i386__) && !defined(__x86_64__) && \ + !defined(__ppc__) && !defined(__PPC__) && \ + !defined(__aarch64__) && !defined(__mips__) && !defined(__arm__) +#undef TCMALLOC_NO_ALIASES +#define TCMALLOC_NO_ALIASES +#endif + +#if defined(__GNUC__) && defined(__ELF__) && !defined(TCMALLOC_NO_ALIASES) +#define TC_ALIAS(name) __attribute__((alias(#name))) +#endif + +// For windows, the printf we use to report large allocs is +// potentially dangerous: it could cause a malloc that would cause an +// infinite loop. So by default we set the threshold to a huge number +// on windows, so this bad situation will never trigger. You can +// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you +// want this functionality. +#ifdef _WIN32 +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62; +#else +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30; +#endif +DEFINE_int64(tcmalloc_large_alloc_report_threshold, + EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD", + kDefaultLargeAllocReportThreshold), + "Allocations larger than this value cause a stack " + "trace to be dumped to stderr. The threshold for " + "dumping stack traces is increased by a factor of 1.125 " + "every time we print a message so that the threshold " + "automatically goes up by a factor of ~1000 every 60 " + "messages. This bounds the amount of extra logging " + "generated by this flag. Default value of this flag " + "is very large and therefore you should see no extra " + "logging unless the flag is overridden. Set to 0 to " + "disable reporting entirely."); + + +// We already declared these functions in tcmalloc.h, but we have to +// declare them again to give them an ATTRIBUTE_SECTION: we want to +// put all callers of MallocHook::Invoke* in this module into +// ATTRIBUTE_SECTION(google_malloc) section, so that +// MallocHook::GetCallerStackTrace can function accurately. +#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother +extern "C" { + void* tc_malloc(size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_free(void* ptr) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_free_sized(void* ptr, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_cfree(void* ptr) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + + void* tc_memalign(size_t __alignment, size_t __size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + int tc_posix_memalign(void** ptr, size_t align, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_valloc(size_t __size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_pvalloc(size_t __size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + + void tc_malloc_stats(void) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo tc_mallinfo(void) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); +#endif + + void* tc_new(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_delete(void* p) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray(void* p) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + + // And the nothrow variants of these: + void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + // Surprisingly, standard C++ library implementations use a + // nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + void tc_delete_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + +#if defined(ENABLE_ALIGNED_NEW_DELETE) + + void* tc_new_aligned(size_t size, std::align_val_t al) + ATTRIBUTE_SECTION(google_malloc); + void tc_delete_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray_aligned(size_t size, std::align_val_t al) + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + + // And the nothrow variants of these: + void* tc_new_aligned_nothrow(size_t size, std::align_val_t al, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t al, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_delete_aligned_nothrow(void* ptr, std::align_val_t al, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_aligned_nothrow(void* ptr, std::align_val_t al, const std::nothrow_t&) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); + +#endif // defined(ENABLE_ALIGNED_NEW_DELETE) + + // Some non-standard extensions that we support. + + // This is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + size_t tc_malloc_size(void* p) PERFTOOLS_NOTHROW + ATTRIBUTE_SECTION(google_malloc); +} // extern "C" +#endif // #ifndef _WIN32 + +// ----------------------- IMPLEMENTATION ------------------------------- + +static int tc_new_mode = 0; // See tc_set_new_mode(). + +// Routines such as free() and realloc() catch some erroneous pointers +// passed to them, and invoke the below when they do. (An erroneous pointer +// won't be caught if it's within a valid span or a stale span for which +// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing +// required) kind of exception handling for these routines. +namespace { +ATTRIBUTE_NOINLINE void InvalidFree(void* ptr) { + if (tcmalloc::IsEmergencyPtr(ptr)) { + tcmalloc::EmergencyFree(ptr); + return; + } + Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr); +} + +size_t InvalidGetSizeForRealloc(const void* old_ptr) { + Log(kCrash, __FILE__, __LINE__, + "Attempt to realloc invalid pointer", old_ptr); + return 0; +} + +size_t InvalidGetAllocatedSize(const void* ptr) { + Log(kCrash, __FILE__, __LINE__, + "Attempt to get the size of an invalid pointer", ptr); + return 0; +} +} // unnamed namespace + +// Extract interesting stats +struct TCMallocStats { + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + PageHeap::Stats pageheap; // Stats from page heap +}; + +// Get stats into "r". Also, if class_count != NULL, class_count[k] +// will be set to the total number of objects of size class k in the +// central cache, transfer cache, and per-thread caches. If small_spans +// is non-NULL, it is filled. Same for large_spans. +static void ExtractStats(TCMallocStats* r, uint64_t* class_count, + PageHeap::SmallSpanStats* small_spans, + PageHeap::LargeSpanStats* large_spans) { + r->central_bytes = 0; + r->transfer_bytes = 0; + for (int cl = 0; cl < Static::num_size_classes(); ++cl) { + const int length = Static::central_cache()[cl].length(); + const int tc_length = Static::central_cache()[cl].tc_length(); + const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes(); + const size_t size = static_cast<uint64_t>( + Static::sizemap()->ByteSizeForClass(cl)); + r->central_bytes += (size * length) + cache_overhead; + r->transfer_bytes += (size * tc_length); + if (class_count) { + // Sum the lengths of all per-class freelists, except the per-thread + // freelists, which get counted when we call GetThreadStats(), below. + class_count[cl] = length + tc_length; + } + + } + + // Add stats from per-thread heaps + r->thread_bytes = 0; + { // scope + SpinLockHolder h(Static::pageheap_lock()); + ThreadCache::GetThreadStats(&r->thread_bytes, class_count); + r->metadata_bytes = tcmalloc::metadata_system_bytes(); + r->pageheap = Static::pageheap()->stats(); + if (small_spans != NULL) { + Static::pageheap()->GetSmallSpanStats(small_spans); + } + if (large_spans != NULL) { + Static::pageheap()->GetLargeSpanStats(large_spans); + } + } +} + +static double PagesToMiB(uint64_t pages) { + return (pages << kPageShift) / 1048576.0; +} + +// WRITE stats to "out" +static void DumpStats(TCMalloc_Printer* out, int level) { + TCMallocStats stats; + uint64_t class_count[kClassSizesMax]; + PageHeap::SmallSpanStats small; + PageHeap::LargeSpanStats large; + if (level >= 2) { + ExtractStats(&stats, class_count, &small, &large); + } else { + ExtractStats(&stats, NULL, NULL, NULL); + } + + static const double MiB = 1048576.0; + + const uint64_t virtual_memory_used = (stats.pageheap.system_bytes + + stats.metadata_bytes); + const uint64_t physical_memory_used = (virtual_memory_used + - stats.pageheap.unmapped_bytes); + const uint64_t bytes_in_use_by_app = (physical_memory_used + - stats.metadata_bytes + - stats.pageheap.free_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.thread_bytes); + +#ifdef TCMALLOC_SMALL_BUT_SLOW + out->printf( + "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif + out->printf( + "------------------------------------------------\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" + "MALLOC:\n" + "MALLOC: %12" PRIu64 " Spans in use\n" + "MALLOC: %12" PRIu64 " Thread heaps in use\n" + "MALLOC: %12" PRIu64 " Tcmalloc page size\n" + "------------------------------------------------\n" + "Call ReleaseFreeMemory() to release freelist memory to the OS" + " (via madvise()).\n" + "Bytes released to the OS take up virtual address space" + " but no physical memory.\n", + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + physical_memory_used, physical_memory_used / MiB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, + uint64_t(Static::span_allocator()->inuse()), + uint64_t(ThreadCache::HeapsInUse()), + uint64_t(kPageSize)); + + if (level >= 2) { + out->printf("------------------------------------------------\n"); + out->printf("Total size of freelists for per-thread caches,\n"); + out->printf("transfer cache, and central cache, by size class\n"); + out->printf("------------------------------------------------\n"); + uint64_t cumulative = 0; + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { + if (class_count[cl] > 0) { + size_t cl_size = Static::sizemap()->ByteSizeForClass(cl); + uint64_t class_bytes = class_count[cl] * cl_size; + cumulative += class_bytes; + out->printf("class %3d [ %8" PRIuS " bytes ] : " + "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", + cl, cl_size, + class_count[cl], + class_bytes / MiB, + cumulative / MiB); + } + } + + // append page heap info + int nonempty_sizes = 0; + for (int s = 0; s < kMaxPages; s++) { + if (small.normal_length[s] + small.returned_length[s] > 0) { + nonempty_sizes++; + } + } + out->printf("------------------------------------------------\n"); + out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n", + nonempty_sizes, stats.pageheap.free_bytes / MiB, + stats.pageheap.unmapped_bytes / MiB); + out->printf("------------------------------------------------\n"); + uint64_t total_normal = 0; + uint64_t total_returned = 0; + for (int s = 1; s <= kMaxPages; s++) { + const int n_length = small.normal_length[s - 1]; + const int r_length = small.returned_length[s - 1]; + if (n_length + r_length > 0) { + uint64_t n_pages = s * n_length; + uint64_t r_pages = s * r_length; + total_normal += n_pages; + total_returned += r_pages; + out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + s, + (n_length + r_length), + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); + } + } + + total_normal += large.normal_pages; + total_returned += large.returned_pages; + out->printf(">%-5u large * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", + static_cast<unsigned int>(kMaxPages), + static_cast<unsigned int>(large.spans), + PagesToMiB(large.normal_pages + large.returned_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(large.returned_pages), + PagesToMiB(total_returned)); + } +} + +static void PrintStats(int level) { + const int kBufferSize = 16 << 10; + char* buffer = new char[kBufferSize]; + TCMalloc_Printer printer(buffer, kBufferSize); + DumpStats(&printer, level); + write(STDERR_FILENO, buffer, strlen(buffer)); + delete[] buffer; +} + +static void** DumpHeapGrowthStackTraces() { + // Count how much space we need + int needed_slots = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + needed_slots += 3 + t->depth; + } + needed_slots += 100; // Slop in case list grows + needed_slots += needed_slots/8; // An extra 12.5% slop + } + + void** result = new void*[needed_slots]; + if (result == NULL) { + Log(kLog, __FILE__, __LINE__, + "tcmalloc: allocation failed for stack trace slots", + needed_slots * sizeof(*result)); + return NULL; + } + + SpinLockHolder h(Static::pageheap_lock()); + int used_slots = 0; + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + ASSERT(used_slots < needed_slots); // Need to leave room for terminator + if (used_slots + 3 + t->depth >= needed_slots) { + // No more room + break; + } + + result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); + result[used_slots+1] = reinterpret_cast<void*>(t->size); + result[used_slots+2] = reinterpret_cast<void*>(t->depth); + for (int d = 0; d < t->depth; d++) { + result[used_slots+3+d] = t->stack[d]; + } + used_slots += 3 + t->depth; + } + result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); + return result; +} + +static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { + PageID page = 1; // Some code may assume that page==0 is never used + bool done = false; + while (!done) { + // Accumulate a small number of ranges in a local buffer + static const int kNumRanges = 16; + static base::MallocRange ranges[kNumRanges]; + int n = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + while (n < kNumRanges) { + if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { + done = true; + break; + } else { + uintptr_t limit = ranges[n].address + ranges[n].length; + page = (limit + kPageSize - 1) >> kPageShift; + n++; + } + } + } + + for (int i = 0; i < n; i++) { + (*func)(arg, &ranges[i]); + } + } +} + +// TCMalloc's support for extra malloc interfaces +class TCMallocImplementation : public MallocExtension { + private: + // ReleaseToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This member keeps track of the extra bytes bytes + // released so that the app can periodically call ReleaseToSystem() to + // release memory at a constant rate. + // NOTE: Protected by Static::pageheap_lock(). + size_t extra_bytes_released_; + + public: + TCMallocImplementation() + : extra_bytes_released_(0) { + } + + virtual void GetStats(char* buffer, int buffer_length) { + ASSERT(buffer_length > 0); + TCMalloc_Printer printer(buffer, buffer_length); + + // Print level one stats unless lots of space is available + if (buffer_length < 10000) { + DumpStats(&printer, 1); + } else { + DumpStats(&printer, 2); + } + } + + // We may print an extra, tcmalloc-specific warning message here. + virtual void GetHeapSample(MallocExtensionWriter* writer) { + if (FLAGS_tcmalloc_sample_parameter == 0) { + const char* const kWarningMsg = + "%warn\n" + "%warn This heap profile does not have any data in it, because\n" + "%warn the application was run with heap sampling turned off.\n" + "%warn To get useful data from GetHeapSample(), you must\n" + "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n" + "%warn a positive sampling period, such as 524288.\n" + "%warn\n"; + writer->append(kWarningMsg, strlen(kWarningMsg)); + } + MallocExtension::GetHeapSample(writer); + } + + virtual void** ReadStackTraces(int* sample_period) { + tcmalloc::StackTraceTable table; + { + SpinLockHolder h(Static::pageheap_lock()); + Span* sampled = Static::sampled_objects(); + for (Span* s = sampled->next; s != sampled; s = s->next) { + table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects)); + } + } + *sample_period = ThreadCache::GetCache()->GetSamplePeriod(); + return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock + } + + virtual void** ReadHeapGrowthStackTraces() { + return DumpHeapGrowthStackTraces(); + } + + virtual size_t GetThreadCacheSize() { + ThreadCache* tc = ThreadCache::GetCacheIfPresent(); + if (!tc) + return 0; + return tc->Size(); + } + + virtual void MarkThreadTemporarilyIdle() { + ThreadCache::BecomeTemporarilyIdle(); + } + + virtual void Ranges(void* arg, RangeFunction func) { + IterateOverRanges(arg, func); + } + + virtual bool GetNumericProperty(const char* name, size_t* value) { + ASSERT(name != NULL); + + if (strcmp(name, "generic.current_allocated_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes; + return true; + } + + if (strcmp(name, "generic.heap_size") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.pageheap.system_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.slack_bytes") == 0) { + // Kept for backwards compatibility. Now defined externally as: + // pageheap_free_bytes + pageheap_unmapped_bytes. + SpinLockHolder l(Static::pageheap_lock()); + PageHeap::Stats stats = Static::pageheap()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.central_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.transfer_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.thread_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().free_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_committed_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().committed_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_scavenge_count") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().scavenge_count; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_commit_count") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().commit_count; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_total_commit_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().total_commit_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_decommit_count") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().decommit_count; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_total_decommit_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().total_decommit_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_reserve_count") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().reserve_count; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_total_reserve_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().total_reserve_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = ThreadCache::overall_thread_cache_size(); + return true; + } + + if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + *value = stats.thread_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = size_t(Static::pageheap()->GetAggressiveDecommit()); + return true; + } + + return false; + } + + virtual bool SetNumericProperty(const char* name, size_t value) { + ASSERT(name != NULL); + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + ThreadCache::set_overall_thread_cache_size(value); + return true; + } + + if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + Static::pageheap()->SetAggressiveDecommit(value != 0); + return true; + } + + return false; + } + + virtual void MarkThreadIdle() { + ThreadCache::BecomeIdle(); + } + + virtual void MarkThreadBusy(); // Implemented below + + virtual SysAllocator* GetSystemAllocator() { + SpinLockHolder h(Static::pageheap_lock()); + return tcmalloc_sys_alloc; + } + + virtual void SetSystemAllocator(SysAllocator* alloc) { + SpinLockHolder h(Static::pageheap_lock()); + tcmalloc_sys_alloc = alloc; + } + + virtual void ReleaseToSystem(size_t num_bytes) { + SpinLockHolder h(Static::pageheap_lock()); + if (num_bytes <= extra_bytes_released_) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released_ = extra_bytes_released_ - num_bytes; + return; + } + num_bytes = num_bytes - extra_bytes_released_; + // num_bytes might be less than one page. If we pass zero to + // ReleaseAtLeastNPages, it won't do anything, so we release a whole + // page now and let extra_bytes_released_ smooth it out over time. + Length num_pages = max<Length>(num_bytes >> kPageShift, 1); + size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( + num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released_ = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to + // compensate with a big release next time. Specifically, + // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). + extra_bytes_released_ = 0; + } + } + + virtual void SetMemoryReleaseRate(double rate) { + FLAGS_tcmalloc_release_rate = rate; + } + + virtual double GetMemoryReleaseRate() { + return FLAGS_tcmalloc_release_rate; + } + virtual size_t GetEstimatedAllocatedSize(size_t size); + + // This just calls GetSizeWithCallback, but because that's in an + // unnamed namespace, we need to move the definition below it in the + // file. + virtual size_t GetAllocatedSize(const void* ptr); + + // This duplicates some of the logic in GetSizeWithCallback, but is + // faster. This is important on OS X, where this function is called + // on every allocation operation. + virtual Ownership GetOwnership(const void* ptr) { + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + // The rest of tcmalloc assumes that all allocated pointers use at + // most kAddressBits bits. If ptr doesn't, then it definitely + // wasn't alloacted by tcmalloc. + if ((p >> (kAddressBits - kPageShift)) > 0) { + return kNotOwned; + } + uint32 cl; + if (Static::pageheap()->TryGetSizeClass(p, &cl)) { + return kOwned; + } + const Span *span = Static::pageheap()->GetDescriptor(p); + return span ? kOwned : kNotOwned; + } + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kCentralCacheType = "tcmalloc.central"; + static const char* kTransferCacheType = "tcmalloc.transfer"; + static const char* kThreadCacheType = "tcmalloc.thread"; + static const char* kPageHeapType = "tcmalloc.page"; + static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped"; + static const char* kLargeSpanType = "tcmalloc.large"; + static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped"; + + v->clear(); + + // central class information + int64 prev_class_size = 0; + for (int cl = 1; cl < Static::num_size_classes(); ++cl) { + size_t class_size = Static::sizemap()->ByteSizeForClass(cl); + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = class_size; + i.total_bytes_free = + Static::central_cache()[cl].length() * class_size; + i.type = kCentralCacheType; + v->push_back(i); + + // transfer cache + i.total_bytes_free = + Static::central_cache()[cl].tc_length() * class_size; + i.type = kTransferCacheType; + v->push_back(i); + + prev_class_size = Static::sizemap()->ByteSizeForClass(cl); + } + + // Add stats from per-thread heaps + uint64_t class_count[kClassSizesMax]; + memset(class_count, 0, sizeof(class_count)); + { + SpinLockHolder h(Static::pageheap_lock()); + uint64_t thread_bytes = 0; + ThreadCache::GetThreadStats(&thread_bytes, class_count); + } + + prev_class_size = 0; + for (int cl = 1; cl < Static::num_size_classes(); ++cl) { + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = Static::sizemap()->ByteSizeForClass(cl); + i.total_bytes_free = + class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); + i.type = kThreadCacheType; + v->push_back(i); + + prev_class_size = Static::sizemap()->ByteSizeForClass(cl); + } + + // append page heap info + PageHeap::SmallSpanStats small; + PageHeap::LargeSpanStats large; + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->GetSmallSpanStats(&small); + Static::pageheap()->GetLargeSpanStats(&large); + } + + // large spans: mapped + MallocExtension::FreeListInfo span_info; + span_info.type = kLargeSpanType; + span_info.max_object_size = (numeric_limits<size_t>::max)(); + span_info.min_object_size = kMaxPages << kPageShift; + span_info.total_bytes_free = large.normal_pages << kPageShift; + v->push_back(span_info); + + // large spans: unmapped + span_info.type = kLargeUnmappedSpanType; + span_info.total_bytes_free = large.returned_pages << kPageShift; + v->push_back(span_info); + + // small spans + for (int s = 1; s <= kMaxPages; s++) { + MallocExtension::FreeListInfo i; + i.max_object_size = (s << kPageShift); + i.min_object_size = ((s - 1) << kPageShift); + + i.type = kPageHeapType; + i.total_bytes_free = (s << kPageShift) * small.normal_length[s - 1]; + v->push_back(i); + + i.type = kPageHeapUnmappedType; + i.total_bytes_free = (s << kPageShift) * small.returned_length[s - 1]; + v->push_back(i); + } + } +}; + +static inline ATTRIBUTE_ALWAYS_INLINE +size_t align_size_up(size_t size, size_t align) { + ASSERT(align <= kPageSize); + size_t new_size = (size + align - 1) & ~(align - 1); + if (PREDICT_FALSE(new_size == 0)) { + // Note, new_size == 0 catches both integer overflow and size + // being 0. + if (size == 0) { + new_size = align; + } else { + new_size = size; + } + } + return new_size; +} + +// Puts in *cl size class that is suitable for allocation of size bytes with +// align alignment. Returns true if such size class exists and false otherwise. +static bool size_class_with_alignment(size_t size, size_t align, uint32_t* cl) { + if (PREDICT_FALSE(align > kPageSize)) { + return false; + } + size = align_size_up(size, align); + if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, cl))) { + return false; + } + ASSERT((Static::sizemap()->class_to_size(*cl) & (align - 1)) == 0); + return true; +} + +// nallocx slow path. Moved to a separate function because +// ThreadCache::InitModule is not inlined which would cause nallocx to +// become non-leaf function with stack frame and stack spills. +static ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) { + if (PREDICT_FALSE(!Static::IsInited())) ThreadCache::InitModule(); + + size_t align = static_cast<size_t>(1ull << (flags & 0x3f)); + uint32 cl; + bool ok = size_class_with_alignment(size, align, &cl); + if (ok) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + return tcmalloc::pages(size) << kPageShift; + } +} + +// The nallocx function allocates no memory, but it performs the same size +// computation as the malloc function, and returns the real size of the +// allocation that would result from the equivalent malloc function call. +// nallocx is a malloc extension originally implemented by jemalloc: +// http://www.unix.com/man-page/freebsd/3/nallocx/ +extern "C" PERFTOOLS_DLL_DECL +size_t tc_nallocx(size_t size, int flags) { + if (PREDICT_FALSE(flags != 0)) { + return nallocx_slow(size, flags); + } + uint32 cl; + // size class 0 is only possible if malloc is not yet initialized + if (Static::sizemap()->GetSizeClass(size, &cl) && cl != 0) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + return nallocx_slow(size, 0); + } +} + +extern "C" PERFTOOLS_DLL_DECL +size_t nallocx(size_t size, int flags) +#ifdef TC_ALIAS + TC_ALIAS(tc_nallocx); +#else +{ + return nallocx_slow(size, flags); +} +#endif + + +size_t TCMallocImplementation::GetEstimatedAllocatedSize(size_t size) { + return tc_nallocx(size, 0); +} + +// The constructor allocates an object to ensure that initialization +// runs before main(), and therefore we do not have a chance to become +// multi-threaded before initialization. We also create the TSD key +// here. Presumably by the time this constructor runs, glibc is in +// good enough shape to handle pthread_key_create(). +// +// The constructor also takes the opportunity to tell STL to use +// tcmalloc. We want to do this early, before construct time, so +// all user STL allocations go through tcmalloc (which works really +// well for STL). +// +// The destructor prints stats when the program exits. +static int tcmallocguard_refcount = 0; // no lock needed: runs before main() +TCMallocGuard::TCMallocGuard() { + if (tcmallocguard_refcount++ == 0) { + ReplaceSystemAlloc(); // defined in libc_override_*.h + tc_free(tc_malloc(1)); + ThreadCache::InitTSD(); + tc_free(tc_malloc(1)); + // Either we, or debugallocation.cc, or valgrind will control memory + // management. We register our extension if we're the winner. +#ifdef TCMALLOC_USING_DEBUGALLOCATION + // Let debugallocation register its extension. +#else + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(new TCMallocImplementation); + } +#endif + } +} + +TCMallocGuard::~TCMallocGuard() { + if (--tcmallocguard_refcount == 0) { + const char* env = NULL; + if (!RunningOnValgrind()) { + // Valgrind uses it's own malloc so we cannot do MALLOCSTATS + env = getenv("MALLOCSTATS"); + } + if (env != NULL) { + int level = atoi(env); + if (level < 1) level = 1; + PrintStats(level); + } + } +} +#ifndef WIN32_OVERRIDE_ALLOCATORS +static TCMallocGuard module_enter_exit_hook; +#endif + +//------------------------------------------------------------------- +// Helpers for the exported routines below +//------------------------------------------------------------------- + +static inline bool CheckCachedSizeClass(void *ptr) { + PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + uint32 cached_value; + if (!Static::pageheap()->TryGetSizeClass(p, &cached_value)) { + return true; + } + return cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; +} + +static inline ATTRIBUTE_ALWAYS_INLINE void* CheckedMallocResult(void *result) { + ASSERT(result == NULL || CheckCachedSizeClass(result)); + return result; +} + +static inline ATTRIBUTE_ALWAYS_INLINE void* SpanToMallocResult(Span *span) { + Static::pageheap()->InvalidateCachedSizeClass(span->start); + return + CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); +} + +static void* DoSampledAllocation(size_t size) { +#ifndef NO_TCMALLOC_SAMPLES + // Grab the stack trace outside the heap lock + StackTrace tmp; + tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); + tmp.size = size; + + SpinLockHolder h(Static::pageheap_lock()); + // Allocate span + Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); + if (PREDICT_FALSE(span == NULL)) { + return NULL; + } + + // Allocate stack trace + StackTrace *stack = Static::stacktrace_allocator()->New(); + if (PREDICT_FALSE(stack == NULL)) { + // Sampling failed because of lack of memory + return span; + } + *stack = tmp; + span->sample = 1; + span->objects = stack; + tcmalloc::DLL_Prepend(Static::sampled_objects(), span); + + return SpanToMallocResult(span); +#else + abort(); +#endif +} + +namespace { + +typedef void* (*malloc_fn)(void *arg); + +SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); + +void* handle_oom(malloc_fn retry_fn, + void* retry_arg, + bool from_operator, + bool nothrow) { + // we hit out of memory condition, usually if it happens we've + // called sbrk or mmap and failed, and thus errno is set. But there + // is support for setting up custom system allocator or setting up + // page heap size limit, in which cases errno may remain + // untouched. + // + // So we set errno here. C++ operator new doesn't require ENOMEM to + // be set, but doesn't forbid it too (and often C++ oom does happen + // with ENOMEM set). + errno = ENOMEM; + if (!from_operator && !tc_new_mode) { + // we're out of memory in C library function (malloc etc) and no + // "new mode" forced on us. Just return NULL + return NULL; + } + // we're OOM in operator new or "new mode" is set. We might have to + // call new_handle and maybe retry allocation. + + for (;;) { + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (!nh) { + return NULL; + } + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); +#else + // If no new_handler is established, the allocation failed. + if (!nh) { + if (nothrow) { + return NULL; + } + throw std::bad_alloc(); + } + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + if (!nothrow) throw; + return NULL; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + + // we get here if new_handler returns successfully. So we retry + // allocation. + void* rv = retry_fn(retry_arg); + if (rv != NULL) { + return rv; + } + + // if allocation failed again we go to next loop iteration + } +} + +// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with +// automatic increases factored in. +static int64_t large_alloc_threshold = + (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold + ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold); + +static void ReportLargeAlloc(Length num_pages, void* result) { + StackTrace stack; + stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1); + + static const int N = 1000; + char buffer[N]; + TCMalloc_Printer printer(buffer, N); + printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ", + static_cast<uint64>(num_pages) << kPageShift, + result); + for (int i = 0; i < stack.depth; i++) { + printer.printf(" %p", stack.stack[i]); + } + printer.printf("\n"); + write(STDERR_FILENO, buffer, strlen(buffer)); +} + +// Must be called with the page lock held. +inline bool should_report_large(Length num_pages) { + const int64 threshold = large_alloc_threshold; + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GiB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + return true; + } + return false; +} + +// Helper for do_malloc(). +static void* do_malloc_pages(ThreadCache* heap, size_t size) { + void* result; + bool report_large; + + Length num_pages = tcmalloc::pages(size); + + // NOTE: we're passing original size here as opposed to rounded-up + // size as we do in do_malloc_small. The difference is small here + // (at most 4k out of at least 256k). And not rounding up saves us + // from possibility of overflow, which rounding up could produce. + // + // See https://github.com/gperftools/gperftools/issues/723 + if (heap->SampleAllocation(size)) { + result = DoSampledAllocation(size); + + SpinLockHolder h(Static::pageheap_lock()); + report_large = should_report_large(num_pages); + } else { + SpinLockHolder h(Static::pageheap_lock()); + Span* span = Static::pageheap()->New(num_pages); + result = (PREDICT_FALSE(span == NULL) ? NULL : SpanToMallocResult(span)); + report_large = should_report_large(num_pages); + } + + if (report_large) { + ReportLargeAlloc(num_pages, result); + } + return result; +} + +static void *nop_oom_handler(size_t size) { + return NULL; +} + +ATTRIBUTE_ALWAYS_INLINE inline void* do_malloc(size_t size) { + if (PREDICT_FALSE(ThreadCache::IsUseEmergencyMalloc())) { + return tcmalloc::EmergencyMalloc(size); + } + + // note: it will force initialization of malloc if necessary + ThreadCache* cache = ThreadCache::GetCache(); + uint32 cl; + + ASSERT(Static::IsInited()); + ASSERT(cache != NULL); + + if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) { + return do_malloc_pages(cache, size); + } + + size_t allocated_size = Static::sizemap()->class_to_size(cl); + if (PREDICT_FALSE(cache->SampleAllocation(allocated_size))) { + return DoSampledAllocation(size); + } + + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, after replenishing it if it's empty. + return CheckedMallocResult(cache->Allocate(allocated_size, cl, nop_oom_handler)); +} + +static void *retry_malloc(void* size) { + return do_malloc(reinterpret_cast<size_t>(size)); +} + +ATTRIBUTE_ALWAYS_INLINE inline void* do_malloc_or_cpp_alloc(size_t size) { + void *rv = do_malloc(size); + if (PREDICT_TRUE(rv != NULL)) { + return rv; + } + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + false, true); +} + +ATTRIBUTE_ALWAYS_INLINE inline void* do_calloc(size_t n, size_t elem_size) { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) return NULL; + + void* result = do_malloc_or_cpp_alloc(size); + if (result != NULL) { + memset(result, 0, size); + } + return result; +} + +// If ptr is NULL, do nothing. Otherwise invoke the given function. +inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) { + if (ptr != NULL) { + (*invalid_free_fn)(ptr); + } +} + +static ATTRIBUTE_NOINLINE void do_free_pages(Span* span, void* ptr) { + SpinLockHolder h(Static::pageheap_lock()); + if (span->sample) { + StackTrace* st = reinterpret_cast<StackTrace*>(span->objects); + tcmalloc::DLL_Remove(span); + Static::stacktrace_allocator()->Delete(st); + span->objects = NULL; + } + Static::pageheap()->Delete(span); +} + +// Helper for the object deletion (free, delete, etc.). Inputs: +// ptr is object to be freed +// invalid_free_fn is a function that gets invoked on certain "bad frees" +// +// We can usually detect the case where ptr is not pointing to a page that +// tcmalloc is using, and in those cases we invoke invalid_free_fn. +ATTRIBUTE_ALWAYS_INLINE inline +void do_free_with_callback(void* ptr, + void (*invalid_free_fn)(void*), + bool use_hint, size_t size_hint) { + ThreadCache* heap = ThreadCache::GetCacheIfPresent(); + + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + uint32 cl; + +#ifndef NO_TCMALLOC_SAMPLES + // we only pass size hint when ptr is not page aligned. Which + // implies that it must be very small object. + ASSERT(!use_hint || size_hint < kPageSize); +#endif + + if (!use_hint || PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size_hint, &cl))) { + // if we're in sized delete, but size is too large, no need to + // probe size cache + bool cache_hit = !use_hint && Static::pageheap()->TryGetSizeClass(p, &cl); + if (PREDICT_FALSE(!cache_hit)) { + Span* span = Static::pageheap()->GetDescriptor(p); + if (PREDICT_FALSE(!span)) { + // span can be NULL because the pointer passed in is NULL or invalid + // (not something returned by malloc or friends), or because the + // pointer was allocated with some other allocator besides + // tcmalloc. The latter can happen if tcmalloc is linked in via + // a dynamic library, but is not listed last on the link line. + // In that case, libraries after it on the link line will + // allocate with libc malloc, but free with tcmalloc's free. + free_null_or_invalid(ptr, invalid_free_fn); + return; + } + cl = span->sizeclass; + if (PREDICT_FALSE(cl == 0)) { + ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); + ASSERT(span != NULL && span->start == p); + do_free_pages(span, ptr); + return; + } + if (!use_hint) { + Static::pageheap()->SetCachedSizeClass(p, cl); + } + } + } + + if (PREDICT_TRUE(heap != NULL)) { + ASSERT(Static::IsInited()); + // If we've hit initialized thread cache, so we're done. + heap->Deallocate(ptr, cl); + return; + } + + if (PREDICT_FALSE(!Static::IsInited())) { + // if free was called very early we've could have missed the case + // of invalid or nullptr free. I.e. because probing size classes + // cache could return bogus result (cl = 0 as of this + // writing). But since there is no way we could be dealing with + // ptr we've allocated, since successfull malloc implies IsInited, + // we can just call "invalid free" handling code. + free_null_or_invalid(ptr, invalid_free_fn); + return; + } + + // Otherwise, delete directly into central cache + tcmalloc::SLL_SetNext(ptr, NULL); + Static::central_cache()[cl].InsertRange(ptr, ptr, 1); +} + +// The default "do_free" that uses the default callback. +ATTRIBUTE_ALWAYS_INLINE inline void do_free(void* ptr) { + return do_free_with_callback(ptr, &InvalidFree, false, 0); +} + +// NOTE: some logic here is duplicated in GetOwnership (above), for +// speed. If you change this function, look at that one too. +inline size_t GetSizeWithCallback(const void* ptr, + size_t (*invalid_getsize_fn)(const void*)) { + if (ptr == NULL) + return 0; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + uint32 cl; + if (Static::pageheap()->TryGetSizeClass(p, &cl)) { + return Static::sizemap()->ByteSizeForClass(cl); + } + + const Span *span = Static::pageheap()->GetDescriptor(p); + if (PREDICT_FALSE(span == NULL)) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } + + if (span->sizeclass != 0) { + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } + + if (span->sample) { + size_t orig_size = reinterpret_cast<StackTrace*>(span->objects)->size; + return tc_nallocx(orig_size, 0); + } + + return span->length << kPageShift; +} + +// This lets you call back to a given function pointer if ptr is invalid. +// It is used primarily by windows code which wants a specialized callback. +ATTRIBUTE_ALWAYS_INLINE inline void* do_realloc_with_callback( + void* old_ptr, size_t new_size, + void (*invalid_free_fn)(void*), + size_t (*invalid_get_size_fn)(const void*)) { + // Get the size of the old entry + const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); + + // Reallocate if the new size is larger than the old size, + // or if the new size is significantly smaller than the old size. + // We do hysteresis to avoid resizing ping-pongs: + // . If we need to grow, grow to max(new_size, old_size * 1.X) + // . Don't shrink unless new_size < old_size * 0.Y + // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5. + const size_t min_growth = min(old_size / 4, + (std::numeric_limits<size_t>::max)() - old_size); // Avoid overflow. + const size_t lower_bound_to_grow = old_size + min_growth; + const size_t upper_bound_to_shrink = old_size / 2ul; + if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) { + // Need to reallocate. + void* new_ptr = NULL; + + if (new_size > old_size && new_size < lower_bound_to_grow) { + new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); + } + if (new_ptr == NULL) { + // Either new_size is not a tiny increment, or last do_malloc failed. + new_ptr = do_malloc_or_cpp_alloc(new_size); + } + if (PREDICT_FALSE(new_ptr == NULL)) { + return NULL; + } + MallocHook::InvokeNewHook(new_ptr, new_size); + memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); + MallocHook::InvokeDeleteHook(old_ptr); + // We could use a variant of do_free() that leverages the fact + // that we already know the sizeclass of old_ptr. The benefit + // would be small, so don't bother. + do_free_with_callback(old_ptr, invalid_free_fn, false, 0); + return new_ptr; + } else { + // We still need to call hooks to report the updated size: + MallocHook::InvokeDeleteHook(old_ptr); + MallocHook::InvokeNewHook(old_ptr, new_size); + return old_ptr; + } +} + +ATTRIBUTE_ALWAYS_INLINE inline void* do_realloc(void* old_ptr, size_t new_size) { + return do_realloc_with_callback(old_ptr, new_size, + &InvalidFree, &InvalidGetSizeForRealloc); +} + +static ATTRIBUTE_ALWAYS_INLINE inline +void* do_memalign_pages(size_t align, size_t size) { + ASSERT((align & (align - 1)) == 0); + ASSERT(align > kPageSize); + if (size + align < size) return NULL; // Overflow + + if (PREDICT_FALSE(Static::pageheap() == NULL)) ThreadCache::InitModule(); + + // Allocate at least one byte to avoid boundary conditions below + if (size == 0) size = 1; + + // We will allocate directly from the page heap + SpinLockHolder h(Static::pageheap_lock()); + + // Allocate extra pages and carve off an aligned portion + const Length alloc = tcmalloc::pages(size + align); + Span* span = Static::pageheap()->New(alloc); + if (PREDICT_FALSE(span == NULL)) return NULL; + + // Skip starting portion so that we end up aligned + Length skip = 0; + while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { + skip++; + } + ASSERT(skip < alloc); + if (skip > 0) { + Span* rest = Static::pageheap()->Split(span, skip); + Static::pageheap()->Delete(span); + span = rest; + } + + // Skip trailing portion that we do not need to return + const Length needed = tcmalloc::pages(size); + ASSERT(span->length >= needed); + if (span->length > needed) { + Span* trailer = Static::pageheap()->Split(span, needed); + Static::pageheap()->Delete(trailer); + } + return SpanToMallocResult(span); +} + +// Helpers for use by exported routines below: + +inline void do_malloc_stats() { + PrintStats(1); +} + +inline int do_mallopt(int cmd, int value) { + return 1; // Indicates error +} + +#ifdef HAVE_STRUCT_MALLINFO +inline struct mallinfo do_mallinfo() { + TCMallocStats stats; + ExtractStats(&stats, NULL, NULL, NULL); + + // Just some of the fields are filled in. + struct mallinfo info; + memset(&info, 0, sizeof(info)); + + // Unfortunately, the struct contains "int" field, so some of the + // size values will be truncated. + info.arena = static_cast<int>(stats.pageheap.system_bytes); + info.fsmblks = static_cast<int>(stats.thread_bytes + + stats.central_bytes + + stats.transfer_bytes); + info.fordblks = static_cast<int>(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast<int>(stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); + + return info; +} +#endif // HAVE_STRUCT_MALLINFO + +} // end unnamed namespace + +// As promised, the definition of this function, declared above. +size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) { + if (ptr == NULL) + return 0; + ASSERT(TCMallocImplementation::GetOwnership(ptr) + != TCMallocImplementation::kNotOwned); + return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); +} + +void TCMallocImplementation::MarkThreadBusy() { + // Allocate to force the creation of a thread cache, but avoid + // invoking any hooks. + do_free(do_malloc(0)); +} + +//------------------------------------------------------------------- +// Exported routines +//------------------------------------------------------------------- + +extern "C" PERFTOOLS_DLL_DECL const char* tc_version( + int* major, int* minor, const char** patch) PERFTOOLS_NOTHROW { + if (major) *major = TC_VERSION_MAJOR; + if (minor) *minor = TC_VERSION_MINOR; + if (patch) *patch = TC_VERSION_PATCH; + return TC_VERSION_STRING; +} + +// This function behaves similarly to MSVC's _set_new_mode. +// If flag is 0 (default), calls to malloc will behave normally. +// If flag is 1, calls to malloc will behave like calls to new, +// and the std_new_handler will be invoked on failure. +// Returns the previous mode. +extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_NOTHROW { + int old_mode = tc_new_mode; + tc_new_mode = flag; + return old_mode; +} + +extern "C" PERFTOOLS_DLL_DECL int tc_query_new_mode() PERFTOOLS_NOTHROW { + return tc_new_mode; +} + +#ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own + +// CAVEAT: The code structure below ensures that MallocHook methods are always +// called from the stack frame of the invoked allocation function. +// heap-checker.cc depends on this to start a stack trace from +// the call to the (de)allocation function. + +namespace tcmalloc { + + +static ATTRIBUTE_SECTION(google_malloc) +void invoke_hooks_and_free(void *ptr) { + MallocHook::InvokeDeleteHook(ptr); + do_free(ptr); +} + +ATTRIBUTE_SECTION(google_malloc) +void* cpp_throw_oom(size_t size) { + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + true, false); +} + +ATTRIBUTE_SECTION(google_malloc) +void* cpp_nothrow_oom(size_t size) { + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + true, true); +} + +ATTRIBUTE_SECTION(google_malloc) +void* malloc_oom(size_t size) { + return handle_oom(retry_malloc, reinterpret_cast<void *>(size), + false, true); +} + +// tcmalloc::allocate_full_XXX is called by fast-path malloc when some +// complex handling is needed (such as fetching object from central +// freelist or malloc sampling). It contains all 'operator new' logic, +// as opposed to malloc_fast_path which only deals with important +// subset of cases. +// +// Note that this is under tcmalloc namespace so that pprof +// can automatically filter it out of growthz/heapz profiles. +// +// We have slightly fancy setup because we need to call hooks from +// function in 'google_malloc' section and we cannot place template +// into this section. Thus 3 separate functions 'built' by macros. +// +// Also note that we're carefully orchestrating for +// MallocHook::GetCallerStackTrace to work even if compiler isn't +// optimizing tail calls (e.g. -O0 is given). We still require +// ATTRIBUTE_ALWAYS_INLINE to work for that case, but it was seen to +// work for -O0 -fno-inline across both GCC and clang. I.e. in this +// case we'll get stack frame for tc_new, followed by stack frame for +// allocate_full_cpp_throw_oom, followed by hooks machinery and user +// code's stack frames. So GetCallerStackTrace will find 2 +// subsequent stack frames in google_malloc section and correctly +// 'cut' stack trace just before tc_new. +template <void* OOMHandler(size_t)> +ATTRIBUTE_ALWAYS_INLINE inline +static void* do_allocate_full(size_t size) { + void* p = do_malloc(size); + if (PREDICT_FALSE(p == NULL)) { + p = OOMHandler(size); + } + MallocHook::InvokeNewHook(p, size); + return CheckedMallocResult(p); +} + +#define AF(oom) \ + ATTRIBUTE_SECTION(google_malloc) \ + void* allocate_full_##oom(size_t size) { \ + return do_allocate_full<oom>(size); \ + } + +AF(cpp_throw_oom) +AF(cpp_nothrow_oom) +AF(malloc_oom) + +#undef AF + +template <void* OOMHandler(size_t)> +static ATTRIBUTE_ALWAYS_INLINE inline void* dispatch_allocate_full(size_t size) { + if (OOMHandler == cpp_throw_oom) { + return allocate_full_cpp_throw_oom(size); + } + if (OOMHandler == cpp_nothrow_oom) { + return allocate_full_cpp_nothrow_oom(size); + } + ASSERT(OOMHandler == malloc_oom); + return allocate_full_malloc_oom(size); +} + +struct retry_memalign_data { + size_t align; + size_t size; +}; + +static void *retry_do_memalign(void *arg) { + retry_memalign_data *data = static_cast<retry_memalign_data *>(arg); + return do_memalign_pages(data->align, data->size); +} + +static ATTRIBUTE_SECTION(google_malloc) +void* memalign_pages(size_t align, size_t size, + bool from_operator, bool nothrow) { + void *rv = do_memalign_pages(align, size); + if (PREDICT_FALSE(rv == NULL)) { + retry_memalign_data data; + data.align = align; + data.size = size; + rv = handle_oom(retry_do_memalign, &data, + from_operator, nothrow); + } + MallocHook::InvokeNewHook(rv, size); + return CheckedMallocResult(rv); +} + +} // namespace tcmalloc + +// This is quick, fast-path-only implementation of malloc/new. It is +// designed to only have support for fast-path. It checks if more +// complex handling is needed (such as a pageheap allocation or +// sampling) and only performs allocation if none of those uncommon +// conditions hold. When we have one of those odd cases it simply +// tail-calls to one of tcmalloc::allocate_full_XXX defined above. +// +// Such approach was found to be quite effective. Generated code for +// tc_{new,malloc} either succeeds quickly or tail-calls to +// allocate_full. Terseness of the source and lack of +// non-tail calls enables compiler to produce better code. Also +// produced code is short enough to enable effort-less human +// comprehension. Which itself led to elimination of various checks +// that were not necessary for fast-path. +template <void* OOMHandler(size_t)> +ATTRIBUTE_ALWAYS_INLINE inline +static void * malloc_fast_path(size_t size) { + if (PREDICT_FALSE(!base::internal::new_hooks_.empty())) { + return tcmalloc::dispatch_allocate_full<OOMHandler>(size); + } + + ThreadCache *cache = ThreadCache::GetFastPathCache(); + + if (PREDICT_FALSE(cache == NULL)) { + return tcmalloc::dispatch_allocate_full<OOMHandler>(size); + } + + uint32 cl; + if (PREDICT_FALSE(!Static::sizemap()->GetSizeClass(size, &cl))) { + return tcmalloc::dispatch_allocate_full<OOMHandler>(size); + } + + size_t allocated_size = Static::sizemap()->ByteSizeForClass(cl); + + if (PREDICT_FALSE(!cache->TryRecordAllocationFast(allocated_size))) { + return tcmalloc::dispatch_allocate_full<OOMHandler>(size); + } + + return CheckedMallocResult(cache->Allocate(allocated_size, cl, OOMHandler)); +} + +template <void* OOMHandler(size_t)> +ATTRIBUTE_ALWAYS_INLINE inline +static void* memalign_fast_path(size_t align, size_t size) { + if (PREDICT_FALSE(align > kPageSize)) { + if (OOMHandler == tcmalloc::cpp_throw_oom) { + return tcmalloc::memalign_pages(align, size, true, false); + } else if (OOMHandler == tcmalloc::cpp_nothrow_oom) { + return tcmalloc::memalign_pages(align, size, true, true); + } else { + ASSERT(OOMHandler == tcmalloc::malloc_oom); + return tcmalloc::memalign_pages(align, size, false, true); + } + } + + // Everything with alignment <= kPageSize we can easily delegate to + // regular malloc + + return malloc_fast_path<OOMHandler>(align_size_up(size, align)); +} + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void* tc_malloc(size_t size) PERFTOOLS_NOTHROW { + return malloc_fast_path<tcmalloc::malloc_oom>(size); +} + +static ATTRIBUTE_ALWAYS_INLINE inline +void free_fast_path(void *ptr) { + if (PREDICT_FALSE(!base::internal::delete_hooks_.empty())) { + tcmalloc::invoke_hooks_and_free(ptr); + return; + } + do_free(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void tc_free(void* ptr) PERFTOOLS_NOTHROW { + free_fast_path(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW { + if (PREDICT_FALSE(!base::internal::delete_hooks_.empty())) { + tcmalloc::invoke_hooks_and_free(ptr); + return; + } +#ifndef NO_TCMALLOC_SAMPLES + // if ptr is kPageSize-aligned, then it could be sampled allocation, + // thus we don't trust hint and just do plain free. It also handles + // nullptr for us. + if (PREDICT_FALSE((reinterpret_cast<uintptr_t>(ptr) & (kPageSize-1)) == 0)) { + tc_free(ptr); + return; + } +#else + if (!ptr) { + return; + } +#endif + do_free_with_callback(ptr, &InvalidFree, true, size); +} + +#ifdef TC_ALIAS + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) PERFTOOLS_NOTHROW + TC_ALIAS(tc_free_sized); +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) PERFTOOLS_NOTHROW + TC_ALIAS(tc_free_sized); + +#else + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized(void *p, size_t size) PERFTOOLS_NOTHROW { + tc_free_sized(p, size); +} +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized(void *p, size_t size) PERFTOOLS_NOTHROW { + tc_free_sized(p, size); +} + +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n, + size_t elem_size) PERFTOOLS_NOTHROW { + if (ThreadCache::IsUseEmergencyMalloc()) { + return tcmalloc::EmergencyCalloc(n, elem_size); + } + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + free_fast_path(ptr); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr, + size_t new_size) PERFTOOLS_NOTHROW { + if (old_ptr == NULL) { + void* result = do_malloc_or_cpp_alloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free(old_ptr); + return NULL; + } + if (PREDICT_FALSE(tcmalloc::IsEmergencyPtr(old_ptr))) { + return tcmalloc::EmergencyRealloc(old_ptr, new_size); + } + return do_realloc(old_ptr, new_size); +} + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void* tc_new(size_t size) { + return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); +} + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void* tc_new_nothrow(size_t size, const std::nothrow_t&) PERFTOOLS_NOTHROW { + return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + free_fast_path(p); +} +#endif + +// Standard C++ library implementations define and use this +// (via ::operator delete(ptr, nothrow)). +// But it's really the same as normal delete, so we just do the same thing. +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + if (PREDICT_FALSE(!base::internal::delete_hooks_.empty())) { + tcmalloc::invoke_hooks_and_free(p); + return; + } + do_free(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) +#ifdef TC_ALIAS +TC_ALIAS(tc_new); +#else +{ + return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_new_nothrow); +#else +{ + return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_free); +#else +{ + free_fast_path(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete_nothrow); +#else +{ + free_fast_path(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL CACHELINE_ALIGNED_FN +void* tc_memalign(size_t align, size_t size) PERFTOOLS_NOTHROW { + return memalign_fast_path<tcmalloc::malloc_oom>(align, size); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign( + void** result_ptr, size_t align, size_t size) PERFTOOLS_NOTHROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = tc_memalign(align, size); + if (PREDICT_FALSE(result == NULL)) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +#if defined(ENABLE_ALIGNED_NEW_DELETE) + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t align) { + return memalign_fast_path<tcmalloc::cpp_throw_oom>(static_cast<size_t>(align), size); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t align, const std::nothrow_t&) PERFTOOLS_NOTHROW { + return memalign_fast_path<tcmalloc::cpp_nothrow_oom>(static_cast<size_t>(align), size); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete); +#else +{ + free_fast_path(p); +} +#endif + +// There is no easy way to obtain the actual size used by do_memalign to allocate aligned storage, so for now +// just ignore the size. It might get useful in the future. +extern "C" PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t align) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete); +#else +{ + free_fast_path(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t, const std::nothrow_t&) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete); +#else +{ + free_fast_path(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t align) +#ifdef TC_ALIAS +TC_ALIAS(tc_new_aligned); +#else +{ + return memalign_fast_path<tcmalloc::cpp_throw_oom>(static_cast<size_t>(align), size); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t align, const std::nothrow_t& nt) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_new_aligned_nothrow); +#else +{ + return memalign_fast_path<tcmalloc::cpp_nothrow_oom>(static_cast<size_t>(align), size); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete_aligned); +#else +{ + free_fast_path(p); +} +#endif + +// There is no easy way to obtain the actual size used by do_memalign to allocate aligned storage, so for now +// just ignore the size. It might get useful in the future. +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t align) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete_sized_aligned); +#else +{ + free_fast_path(p); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t, const std::nothrow_t&) PERFTOOLS_NOTHROW +#ifdef TC_ALIAS +TC_ALIAS(tc_delete_aligned_nothrow); +#else +{ + free_fast_path(p); +} +#endif + +#endif // defined(ENABLE_ALIGNED_NEW_DELETE) + +static size_t pagesize = 0; + +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) PERFTOOLS_NOTHROW { + // Allocate page-aligned object of length >= size bytes + if (pagesize == 0) pagesize = getpagesize(); + return tc_memalign(pagesize, size); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) PERFTOOLS_NOTHROW { + // Round up size to a multiple of pagesize + if (pagesize == 0) pagesize = getpagesize(); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + size = (size + pagesize - 1) & ~(pagesize - 1); + return tc_memalign(pagesize, size); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW { + do_malloc_stats(); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) PERFTOOLS_NOTHROW { + return do_mallinfo(); +} +#endif + +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW { + return MallocExtension::instance()->GetAllocatedSize(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW { + void* result = do_malloc(size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +#endif // TCMALLOC_USING_DEBUGALLOCATION diff --git a/src/third_party/gperftools-2.7/src/tcmalloc.h b/src/third_party/gperftools-2.7/src/tcmalloc.h new file mode 100644 index 00000000000..25cf982e21f --- /dev/null +++ b/src/third_party/gperftools-2.7/src/tcmalloc.h @@ -0,0 +1,70 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Some obscure memory-allocation routines may not be declared on all +// systems. In those cases, we'll just declare them ourselves. +// This file is meant to be used only internally, for unittests. + +#include <config.h> + +#ifndef _XOPEN_SOURCE +# define _XOPEN_SOURCE 600 // for posix_memalign +#endif +#include <stdlib.h> // for posix_memalign +// FreeBSD has malloc.h, but complains if you use it +#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__) +#include <malloc.h> // for memalign, valloc, pvalloc +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#if !HAVE_CFREE_SYMBOL +extern "C" void cfree(void* ptr) __THROW; +#endif +#if !HAVE_DECL_POSIX_MEMALIGN +extern "C" int posix_memalign(void** ptr, size_t align, size_t size) __THROW; +#endif +#if !HAVE_DECL_MEMALIGN +extern "C" void* memalign(size_t __alignment, size_t __size) __THROW; +#endif +#if !HAVE_DECL_VALLOC +extern "C" void* valloc(size_t __size) __THROW; +#endif +#if !HAVE_DECL_PVALLOC +extern "C" void* pvalloc(size_t __size) __THROW; +#endif diff --git a/src/third_party/gperftools-2.7/src/tcmalloc_guard.h b/src/third_party/gperftools-2.7/src/tcmalloc_guard.h new file mode 100644 index 00000000000..84952bac2ea --- /dev/null +++ b/src/third_party/gperftools-2.7/src/tcmalloc_guard.h @@ -0,0 +1,49 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// We expose the TCMallocGuard class -- which initializes the tcmalloc +// allocator -- so classes that need to be sure tcmalloc is loaded +// before they do stuff -- notably heap-profiler -- can. To use this +// create a static TCMallocGuard instance at the top of a file where +// you need tcmalloc to be initialized before global constructors run. + +#ifndef TCMALLOC_TCMALLOC_GUARD_H_ +#define TCMALLOC_TCMALLOC_GUARD_H_ + +class TCMallocGuard { + public: + TCMallocGuard(); + ~TCMallocGuard(); +}; + +#endif // TCMALLOC_TCMALLOC_GUARD_H_ diff --git a/src/third_party/gperftools-2.7/src/third_party/valgrind.h b/src/third_party/gperftools-2.7/src/third_party/valgrind.h new file mode 100644 index 00000000000..577c59ab0cd --- /dev/null +++ b/src/third_party/gperftools-2.7/src/third_party/valgrind.h @@ -0,0 +1,3924 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2008 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + +#include <stdarg.h> + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). */ +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#if !defined(_AIX) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif !defined(_AIX) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(_AIX) && defined(__64BIT__) +# define PLAT_ppc64_aix5 1 +#elif defined(_AIX) && !defined(__64BIT__) +# define PLAT_ppc32_aix5 1 +#endif + + +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \ + && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \ + && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + (_zzq_rlval) = (_zzq_default); \ + } + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r" (_zzq_result) \ + : "0" (_zzq_default), "r" (_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_aix5 */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + _vgwZU_##soname##_##fnname + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + _vgwZZ_##soname##_##fnname + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $4, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $8, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $12, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $20, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $24, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $28, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $36, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $40, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $44, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "lwz 3," #_n_fr "(1)\n\t" \ + "stw 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,68(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "ld 3," #_n_fr "(1)\n\t" \ + "std 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_aix5 */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0); \ + _qzz_res; \ + }) + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0); \ + } + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. */ + +#if defined(NVALGRIND) + +# define VALGRIND_PRINTF(...) +# define VALGRIND_PRINTF_BACKTRACE(...) + +#else /* NVALGRIND */ + +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +#endif /* NVALGRIND */ + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0); \ + _qyy_res; \ + }) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + __extension__ \ + ({unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +/* Mark a block of memory as having been allocated by a malloc()-like + function. `addr' is the start of the usable block (ie. after any + redzone) `rzB' is redzone size if the allocator can apply redzones; + use '0' if not. Adding redzones makes it more likely Valgrind will spot + block overruns. `is_zeroed' indicates if the memory is zeroed, as it is + for calloc(). Put it immediately after the point where a block is + allocated. + + If you're using Memcheck: If you're allocating memory via superblocks, + and then handing out small chunks of each superblock, if you don't have + redzones on your small blocks, it's worth marking the superblock with + VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are + detected. But if you can put redzones on, it's probably better to not do + this, so that messages for small overruns are described in terms of the + small block rather than the superblock (but if you have a big overrun + that skips over a redzone, you could miss an error this way). See + memcheck/tests/custom_alloc.c for an example. + + WARNING: if your allocator uses malloc() or 'new' to allocate + superblocks, rather than mmap() or brk(), this will not work properly -- + you'll likely get assertion failures during leak detection. This is + because Valgrind doesn't like seeing overlapping heap blocks. Sorry. + + Nb: block must be freed via a free()-like function specified + with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0); \ + } + +/* Mark a block of memory as having been freed by a free()-like function. + `rzB' is redzone size; it must match that given to + VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak + checker. Put it immediately after the point where the block is freed. */ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0); \ + } + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0); \ + } + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0); \ + } + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0); \ + } + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0); \ + } + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0); \ + } + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0); \ + } + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0); \ + } + + +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#endif /* __VALGRIND_H */ diff --git a/src/third_party/gperftools-2.7/src/thread_cache.cc b/src/third_party/gperftools-2.7/src/thread_cache.cc new file mode 100644 index 00000000000..6d2f8321723 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/thread_cache.cc @@ -0,0 +1,529 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include <config.h> +#include "thread_cache.h" +#include <errno.h> +#include <string.h> // for memcpy +#include <algorithm> // for max, min +#include "base/commandlineflags.h" // for SpinLockHolder +#include "base/spinlock.h" // for SpinLockHolder +#include "getenv_safe.h" // for TCMallocGetenvSafe +#include "central_freelist.h" // for CentralFreeListPadded +#include "maybe_threads.h" + +using std::min; +using std::max; + +// Note: this is initialized manually in InitModule to ensure that +// it's configured at right time +// +// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, +// EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", +// kDefaultOverallThreadCacheSize), +// "Bound on the total amount of bytes allocated to " +// "thread caches. This bound is not strict, so it is possible " +// "for the cache to go over this bound in certain circumstances. " +// "Maximum value of this flag is capped to 1 GB."); + + +namespace tcmalloc { + +static bool phinited = false; + +volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; +size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; +ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; +PageHeapAllocator<ThreadCache> threadcache_allocator; +ThreadCache* ThreadCache::thread_heaps_ = NULL; +int ThreadCache::thread_heap_count_ = 0; +ThreadCache* ThreadCache::next_memory_steal_ = NULL; +#ifdef HAVE_TLS +__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_ + ATTR_INITIAL_EXEC CACHELINE_ALIGNED; +#endif +bool ThreadCache::tsd_inited_ = false; +pthread_key_t ThreadCache::heap_key_; + +void ThreadCache::Init(pthread_t tid) { + size_ = 0; + + max_size_ = 0; + IncreaseCacheLimitLocked(); + if (max_size_ == 0) { + // There isn't enough memory to go around. Just give the minimum to + // this thread. + SetMaxSize(kMinThreadCacheSize); + + // Take unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kMinThreadCacheSize; + ASSERT(unclaimed_cache_space_ < 0); + } + + next_ = NULL; + prev_ = NULL; + tid_ = tid; + in_setspecific_ = false; + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { + list_[cl].Init(Static::sizemap()->class_to_size(cl)); + } + + uint32_t sampler_seed; + memcpy(&sampler_seed, &tid, sizeof(sampler_seed)); + sampler_.Init(sampler_seed); +} + +void ThreadCache::Cleanup() { + // Put unused memory back into central cache + for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) { + if (list_[cl].length() > 0) { + ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); + } + } +} + +// Remove some objects of class "cl" from central cache and add to thread heap. +// On success, return the first object for immediate use; otherwise return NULL. +void* ThreadCache::FetchFromCentralCache(uint32 cl, int32_t byte_size, + void *(*oom_handler)(size_t size)) { + FreeList* list = &list_[cl]; + ASSERT(list->empty()); + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + + const int num_to_move = min<int>(list->max_length(), batch_size); + void *start, *end; + int fetch_count = Static::central_cache()[cl].RemoveRange( + &start, &end, num_to_move); + + if (fetch_count == 0) { + ASSERT(start == NULL); + return oom_handler(byte_size); + } + ASSERT(start != NULL); + + if (--fetch_count >= 0) { + size_ += byte_size * fetch_count; + list->PushRange(fetch_count, SLL_Next(start), end); + } + + // Increase max length slowly up to batch_size. After that, + // increase by batch_size in one shot so that the length is a + // multiple of batch_size. + if (list->max_length() < batch_size) { + list->set_max_length(list->max_length() + 1); + } else { + // Don't let the list get too long. In 32 bit builds, the length + // is represented by a 16 bit int, so we need to watch out for + // integer overflow. + int new_length = min<int>(list->max_length() + batch_size, + kMaxDynamicFreeListLength); + // The list's max_length must always be a multiple of batch_size, + // and kMaxDynamicFreeListLength is not necessarily a multiple + // of batch_size. + new_length -= new_length % batch_size; + ASSERT(new_length % batch_size == 0); + list->set_max_length(new_length); + } + return start; +} + +void ThreadCache::ListTooLong(FreeList* list, uint32 cl) { + size_ += list->object_size(); + + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + ReleaseToCentralCache(list, cl, batch_size); + + // If the list is too long, we need to transfer some number of + // objects to the central cache. Ideally, we would transfer + // num_objects_to_move, so the code below tries to make max_length + // converge on num_objects_to_move. + + if (list->max_length() < batch_size) { + // Slow start the max_length so we don't overreserve. + list->set_max_length(list->max_length() + 1); + } else if (list->max_length() > batch_size) { + // If we consistently go over max_length, shrink max_length. If we don't + // shrink it, some amount of memory will always stay in this freelist. + list->set_length_overages(list->length_overages() + 1); + if (list->length_overages() > kMaxOverages) { + ASSERT(list->max_length() > batch_size); + list->set_max_length(list->max_length() - batch_size); + list->set_length_overages(0); + } + } + + if (PREDICT_FALSE(size_ > max_size_)) { + Scavenge(); + } +} + +// Remove some objects of class "cl" from thread heap and add to central cache +void ThreadCache::ReleaseToCentralCache(FreeList* src, uint32 cl, int N) { + ASSERT(src == &list_[cl]); + if (N > src->length()) N = src->length(); + size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl); + + // We return prepackaged chains of the correct size to the central cache. + // TODO: Use the same format internally in the thread caches? + int batch_size = Static::sizemap()->num_objects_to_move(cl); + while (N > batch_size) { + void *tail, *head; + src->PopRange(batch_size, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, batch_size); + N -= batch_size; + } + void *tail, *head; + src->PopRange(N, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, N); + size_ -= delta_bytes; +} + +// Release idle memory to the central cache +void ThreadCache::Scavenge() { + // If the low-water mark for the free list is L, it means we would + // not have had to allocate anything from the central cache even if + // we had reduced the free list size by L. We aim to get closer to + // that situation by dropping L/2 nodes from the free list. This + // may not release much memory, but if so we will call scavenge again + // pretty soon and the low-water marks will be high on that call. + for (int cl = 0; cl < Static::num_size_classes(); cl++) { + FreeList* list = &list_[cl]; + const int lowmark = list->lowwatermark(); + if (lowmark > 0) { + const int drop = (lowmark > 1) ? lowmark/2 : 1; + ReleaseToCentralCache(list, cl, drop); + + // Shrink the max length if it isn't used. Only shrink down to + // batch_size -- if the thread was active enough to get the max_length + // above batch_size, it will likely be that active again. If + // max_length shinks below batch_size, the thread will have to + // go through the slow-start behavior again. The slow-start is useful + // mainly for threads that stay relatively idle for their entire + // lifetime. + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + if (list->max_length() > batch_size) { + list->set_max_length( + max<int>(list->max_length() - batch_size, batch_size)); + } + } + list->clear_lowwatermark(); + } + + IncreaseCacheLimit(); +} + +void ThreadCache::IncreaseCacheLimit() { + SpinLockHolder h(Static::pageheap_lock()); + IncreaseCacheLimitLocked(); +} + +void ThreadCache::IncreaseCacheLimitLocked() { + if (unclaimed_cache_space_ > 0) { + // Possibly make unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kStealAmount; + SetMaxSize(max_size_ + kStealAmount); + return; + } + // Don't hold pageheap_lock too long. Try to steal from 10 other + // threads before giving up. The i < 10 condition also prevents an + // infinite loop in case none of the existing thread heaps are + // suitable places to steal from. + for (int i = 0; i < 10; + ++i, next_memory_steal_ = next_memory_steal_->next_) { + // Reached the end of the linked list. Start at the beginning. + if (next_memory_steal_ == NULL) { + ASSERT(thread_heaps_ != NULL); + next_memory_steal_ = thread_heaps_; + } + if (next_memory_steal_ == this || + next_memory_steal_->max_size_ <= kMinThreadCacheSize) { + continue; + } + next_memory_steal_->SetMaxSize(next_memory_steal_->max_size_ - kStealAmount); + SetMaxSize(max_size_ + kStealAmount); + + next_memory_steal_ = next_memory_steal_->next_; + return; + } +} + +int ThreadCache::GetSamplePeriod() { + return sampler_.GetSamplePeriod(); +} + +void ThreadCache::InitModule() { + { + SpinLockHolder h(Static::pageheap_lock()); + if (phinited) { + return; + } + const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"); + if (tcb) { + set_overall_thread_cache_size(strtoll(tcb, NULL, 10)); + } + Static::InitStaticVars(); + threadcache_allocator.Init(); + phinited = 1; + } + + // We do "late" part of initialization without holding lock since + // there is chance it'll recurse into malloc + Static::InitLateMaybeRecursive(); +} + +void ThreadCache::InitTSD() { + ASSERT(!tsd_inited_); + perftools_pthread_key_create(&heap_key_, DestroyThreadCache); + tsd_inited_ = true; + +#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY + // We may have used a fake pthread_t for the main thread. Fix it. + pthread_t zero; + memset(&zero, 0, sizeof(zero)); + SpinLockHolder h(Static::pageheap_lock()); + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == zero) { + h->tid_ = pthread_self(); + } + } +#endif +} + +ThreadCache* ThreadCache::CreateCacheIfNecessary() { + if (!tsd_inited_) { +#ifndef NDEBUG + // tests that freeing nullptr very early is working + free(NULL); +#endif + + InitModule(); + } + + // Initialize per-thread data if necessary + ThreadCache* heap = NULL; + + bool seach_condition = true; +#ifdef HAVE_TLS + static __thread ThreadCache** current_heap_ptr; + if (tsd_inited_) { + // In most common case we're avoiding expensive linear search + // through all heaps (see below). Working TLS enables faster + // protection from malloc recursion in pthread_setspecific + seach_condition = false; + + if (current_heap_ptr != NULL) { + // we're being recursively called by pthread_setspecific below. + return *current_heap_ptr; + } + current_heap_ptr = &heap; + } +#endif + + { + SpinLockHolder h(Static::pageheap_lock()); + // On some old glibc's, and on freebsd's libc (as of freebsd 8.1), + // calling pthread routines (even pthread_self) too early could + // cause a segfault. Since we can call pthreads quite early, we + // have to protect against that in such situations by making a + // 'fake' pthread. This is not ideal since it doesn't work well + // when linking tcmalloc statically with apps that create threads + // before main, so we only do it if we have to. +#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY + pthread_t me; + if (!tsd_inited_) { + memset(&me, 0, sizeof(me)); + } else { + me = pthread_self(); + } +#else + const pthread_t me = pthread_self(); +#endif + + // This may be a recursive malloc call from pthread_setspecific() + // In that case, the heap for this thread has already been created + // and added to the linked list. So we search for that first. + if (seach_condition) { + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == me) { + heap = h; + break; + } + } + } + + if (heap == NULL) heap = NewHeap(me); + } + + // We call pthread_setspecific() outside the lock because it may + // call malloc() recursively. We check for the recursive call using + // the "in_setspecific_" flag so that we can avoid calling + // pthread_setspecific() if we are already inside pthread_setspecific(). + if (!heap->in_setspecific_ && tsd_inited_) { + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, heap); +#ifdef HAVE_TLS + // Also keep a copy in __thread for faster retrieval + threadlocal_data_.heap = heap; + threadlocal_data_.fast_path_heap = heap; +#endif + heap->in_setspecific_ = false; + } +#ifdef HAVE_TLS + current_heap_ptr = NULL; +#endif + return heap; +} + +ThreadCache* ThreadCache::NewHeap(pthread_t tid) { + // Create the heap and add it to the linked list + ThreadCache *heap = threadcache_allocator.New(); + heap->Init(tid); + heap->next_ = thread_heaps_; + heap->prev_ = NULL; + if (thread_heaps_ != NULL) { + thread_heaps_->prev_ = heap; + } else { + // This is the only thread heap at the momment. + ASSERT(next_memory_steal_ == NULL); + next_memory_steal_ = heap; + } + thread_heaps_ = heap; + thread_heap_count_++; + return heap; +} + +void ThreadCache::BecomeIdle() { + if (!tsd_inited_) return; // No caches yet + ThreadCache* heap = GetThreadHeap(); + if (heap == NULL) return; // No thread cache to remove + if (heap->in_setspecific_) return; // Do not disturb the active caller + + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, NULL); +#ifdef HAVE_TLS + // Also update the copy in __thread + threadlocal_data_.heap = NULL; + threadlocal_data_.fast_path_heap = NULL; +#endif + heap->in_setspecific_ = false; + if (GetThreadHeap() == heap) { + // Somehow heap got reinstated by a recursive call to malloc + // from pthread_setspecific. We give up in this case. + return; + } + + // We can now get rid of the heap + DeleteCache(heap); +} + +void ThreadCache::BecomeTemporarilyIdle() { + ThreadCache* heap = GetCacheIfPresent(); + if (heap) + heap->Cleanup(); +} + +void ThreadCache::DestroyThreadCache(void* ptr) { + // Note that "ptr" cannot be NULL since pthread promises not + // to invoke the destructor on NULL values, but for safety, + // we check anyway. + if (ptr == NULL) return; +#ifdef HAVE_TLS + // Prevent fast path of GetThreadHeap() from returning heap. + threadlocal_data_.heap = NULL; + threadlocal_data_.fast_path_heap = NULL; +#endif + DeleteCache(reinterpret_cast<ThreadCache*>(ptr)); +} + +void ThreadCache::DeleteCache(ThreadCache* heap) { + // Remove all memory from heap + heap->Cleanup(); + + // Remove from linked list + SpinLockHolder h(Static::pageheap_lock()); + if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; + if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; + if (thread_heaps_ == heap) thread_heaps_ = heap->next_; + thread_heap_count_--; + + if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_; + if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_; + unclaimed_cache_space_ += heap->max_size_; + + threadcache_allocator.Delete(heap); +} + +void ThreadCache::RecomputePerThreadCacheSize() { + // Divide available space across threads + int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1; + size_t space = overall_thread_cache_size_ / n; + + // Limit to allowed range + if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; + if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; + + double ratio = space / max<double>(1, per_thread_cache_size_); + size_t claimed = 0; + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + // Increasing the total cache size should not circumvent the + // slow-start growth of max_size_. + if (ratio < 1.0) { + h->SetMaxSize(h->max_size_ * ratio); + } + claimed += h->max_size_; + } + unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; + per_thread_cache_size_ = space; +} + +void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + *total_bytes += h->Size(); + if (class_count) { + for (int cl = 0; cl < Static::num_size_classes(); ++cl) { + class_count[cl] += h->freelist_length(cl); + } + } + } +} + +void ThreadCache::set_overall_thread_cache_size(size_t new_size) { + // Clip the value to a reasonable range + if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; + if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB + overall_thread_cache_size_ = new_size; + + RecomputePerThreadCacheSize(); +} + +} // namespace tcmalloc diff --git a/src/third_party/gperftools-2.7/src/thread_cache.h b/src/third_party/gperftools-2.7/src/thread_cache.h new file mode 100644 index 00000000000..f8be15267a4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/thread_cache.h @@ -0,0 +1,510 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_THREAD_CACHE_H_ +#define TCMALLOC_THREAD_CACHE_H_ + +#include <config.h> +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_t, pthread_key_t +#endif +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint32_t, uint64_t +#endif +#include <sys/types.h> // for ssize_t +#include "base/commandlineflags.h" +#include "common.h" +#include "linked_list.h" +#include "maybe_threads.h" +#include "page_heap_allocator.h" +#include "sampler.h" +#include "static_vars.h" + +#include "common.h" // for SizeMap, kMaxSize, etc +#include "internal_logging.h" // for ASSERT, etc +#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "sampler.h" // for Sampler +#include "static_vars.h" // for Static + +DECLARE_int64(tcmalloc_sample_parameter); + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Data kept per thread +//------------------------------------------------------------------- + +class ThreadCache { + public: +#ifdef HAVE_TLS + enum { have_tls = true }; +#else + enum { have_tls = false }; +#endif + + void Init(pthread_t tid); + void Cleanup(); + + // Accessors (mostly just for printing stats) + int freelist_length(uint32 cl) const { return list_[cl].length(); } + + // Total byte size in cache + size_t Size() const { return size_; } + + // Allocate an object of the given size and class. The size given + // must be the same as the size of the class in the size map. + void* Allocate(size_t size, uint32 cl, void *(*oom_handler)(size_t size)); + void Deallocate(void* ptr, uint32 size_class); + + void Scavenge(); + + int GetSamplePeriod(); + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + bool TryRecordAllocationFast(size_t k); + + static void InitModule(); + static void InitTSD(); + static ThreadCache* GetThreadHeap(); + static ThreadCache* GetCache(); + static ThreadCache* GetCacheIfPresent(); + static ThreadCache* GetFastPathCache(); + static ThreadCache* GetCacheWhichMustBePresent(); + static ThreadCache* CreateCacheIfNecessary(); + static void BecomeIdle(); + static void BecomeTemporarilyIdle(); + static void SetUseEmergencyMalloc(); + static void ResetUseEmergencyMalloc(); + static bool IsUseEmergencyMalloc(); + + // Return the number of thread heaps in use. + static inline int HeapsInUse(); + + // Adds to *total_bytes the total number of bytes used by all thread heaps. + // Also, if class_count is not NULL, it must be an array of size kNumClasses, + // and this function will increment each element of class_count by the number + // of items in all thread-local freelists of the corresponding size class. + // REQUIRES: Static::pageheap_lock is held. + static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count); + + // Sets the total thread cache size to new_size, recomputing the + // individual thread cache sizes as necessary. + // REQUIRES: Static::pageheap lock is held. + static void set_overall_thread_cache_size(size_t new_size); + static size_t overall_thread_cache_size() { + return overall_thread_cache_size_; + } + + private: + class FreeList { + private: + void* list_; // Linked list of nodes + +#ifdef _LP64 + // On 64-bit hardware, manipulating 16-bit values may be slightly slow. + uint32_t length_; // Current length. + uint32_t lowater_; // Low water mark for list length. + uint32_t max_length_; // Dynamic max list length based on usage. + // Tracks the number of times a deallocation has caused + // length_ > max_length_. After the kMaxOverages'th time, max_length_ + // shrinks and length_overages_ is reset to zero. + uint32_t length_overages_; +#else + // If we aren't using 64-bit pointers then pack these into less space. + uint16_t length_; + uint16_t lowater_; + uint16_t max_length_; + uint16_t length_overages_; +#endif + + int32_t size_; + + public: + void Init(size_t size) { + list_ = NULL; + length_ = 0; + lowater_ = 0; + max_length_ = 1; + length_overages_ = 0; + size_ = size; + } + + // Return current length of list + size_t length() const { + return length_; + } + + int32_t object_size() const { + return size_; + } + + // Return the maximum length of the list. + size_t max_length() const { + return max_length_; + } + + // Set the maximum length of the list. If 'new_max' > length(), the + // client is responsible for removing objects from the list. + void set_max_length(size_t new_max) { + max_length_ = new_max; + } + + // Return the number of times that length() has gone over max_length(). + size_t length_overages() const { + return length_overages_; + } + + void set_length_overages(size_t new_count) { + length_overages_ = new_count; + } + + // Is list empty? + bool empty() const { + return list_ == NULL; + } + + // Low-water mark management + int lowwatermark() const { return lowater_; } + void clear_lowwatermark() { lowater_ = length_; } + + uint32_t Push(void* ptr) { + uint32_t length = length_ + 1; + SLL_Push(&list_, ptr); + length_ = length; + return length; + } + + void* Pop() { + ASSERT(list_ != NULL); + length_--; + if (length_ < lowater_) lowater_ = length_; + return SLL_Pop(&list_); + } + + bool TryPop(void **rv) { + if (SLL_TryPop(&list_, rv)) { + length_--; + if (PREDICT_FALSE(length_ < lowater_)) lowater_ = length_; + return true; + } + return false; + } + + void* Next() { + return SLL_Next(&list_); + } + + void PushRange(int N, void *start, void *end) { + SLL_PushRange(&list_, start, end); + length_ += N; + } + + void PopRange(int N, void **start, void **end) { + SLL_PopRange(&list_, N, start, end); + ASSERT(length_ >= N); + length_ -= N; + if (length_ < lowater_) lowater_ = length_; + } + }; + + // Gets and returns an object from the central cache, and, if possible, + // also adds some objects of that size class to this thread cache. + void* FetchFromCentralCache(uint32 cl, int32_t byte_size, + void *(*oom_handler)(size_t size)); + + void ListTooLong(void* ptr, uint32 cl); + + // Releases some number of items from src. Adjusts the list's max_length + // to eventually converge on num_objects_to_move(cl). + void ListTooLong(FreeList* src, uint32 cl); + + // Releases N items from this thread cache. + void ReleaseToCentralCache(FreeList* src, uint32 cl, int N); + + void SetMaxSize(int32 new_max_size); + + // Increase max_size_ by reducing unclaimed_cache_space_ or by + // reducing the max_size_ of some other thread. In both cases, + // the delta is kStealAmount. + void IncreaseCacheLimit(); + // Same as above but requires Static::pageheap_lock() is held. + void IncreaseCacheLimitLocked(); + + // If TLS is available, we also store a copy of the per-thread object + // in a __thread variable since __thread variables are faster to read + // than pthread_getspecific(). We still need pthread_setspecific() + // because __thread variables provide no way to run cleanup code when + // a thread is destroyed. + // We also give a hint to the compiler to use the "initial exec" TLS + // model. This is faster than the default TLS model, at the cost that + // you cannot dlopen this library. (To see the difference, look at + // the CPU use of __tls_get_addr with and without this attribute.) + // Since we don't really use dlopen in google code -- and using dlopen + // on a malloc replacement is asking for trouble in any case -- that's + // a good tradeoff for us. +#ifdef HAVE_TLS + struct ThreadLocalData { + ThreadCache* fast_path_heap; + ThreadCache* heap; + bool use_emergency_malloc; + }; + static __thread ThreadLocalData threadlocal_data_ + CACHELINE_ALIGNED ATTR_INITIAL_EXEC; + +#endif + + // Thread-specific key. Initialization here is somewhat tricky + // because some Linux startup code invokes malloc() before it + // is in a good enough state to handle pthread_keycreate(). + // Therefore, we use TSD keys only after tsd_inited is set to true. + // Until then, we use a slow path to get the heap object. + static ATTRIBUTE_HIDDEN bool tsd_inited_; + static pthread_key_t heap_key_; + + // Linked list of heap objects. Protected by Static::pageheap_lock. + static ThreadCache* thread_heaps_; + static int thread_heap_count_; + + // A pointer to one of the objects in thread_heaps_. Represents + // the next ThreadCache from which a thread over its max_size_ should + // steal memory limit. Round-robin through all of the objects in + // thread_heaps_. Protected by Static::pageheap_lock. + static ThreadCache* next_memory_steal_; + + // Overall thread cache size. Protected by Static::pageheap_lock. + static size_t overall_thread_cache_size_; + + // Global per-thread cache size. Writes are protected by + // Static::pageheap_lock. Reads are done without any locking, which should be + // fine as long as size_t can be written atomically and we don't place + // invariants between this variable and other pieces of state. + static volatile size_t per_thread_cache_size_; + + // Represents overall_thread_cache_size_ minus the sum of max_size_ + // across all ThreadCaches. Protected by Static::pageheap_lock. + static ssize_t unclaimed_cache_space_; + + // This class is laid out with the most frequently used fields + // first so that hot elements are placed on the same cache line. + + FreeList list_[kClassSizesMax]; // Array indexed by size-class + + int32 size_; // Combined size of data + int32 max_size_; // size_ > max_size_ --> Scavenge() + + // We sample allocations, biased by the size of the allocation + Sampler sampler_; // A sampler + + pthread_t tid_; // Which thread owns it + bool in_setspecific_; // In call to pthread_setspecific? + + // Allocate a new heap. REQUIRES: Static::pageheap_lock is held. + static ThreadCache* NewHeap(pthread_t tid); + + // Use only as pthread thread-specific destructor function. + static void DestroyThreadCache(void* ptr); + + static void DeleteCache(ThreadCache* heap); + static void RecomputePerThreadCacheSize(); + +public: + + // All ThreadCache objects are kept in a linked list (for stats collection) + ThreadCache* next_; + ThreadCache* prev_; + + // Ensure that this class is cacheline-aligned. This is critical for + // performance, as false sharing would negate many of the benefits + // of a per-thread cache. +} CACHELINE_ALIGNED; + +// Allocator for thread heaps +// This is logically part of the ThreadCache class, but MSVC, at +// least, does not like using ThreadCache as a template argument +// before the class is fully defined. So we put it outside the class. +extern PageHeapAllocator<ThreadCache> threadcache_allocator; + +inline int ThreadCache::HeapsInUse() { + return threadcache_allocator.inuse(); +} + +inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate( + size_t size, uint32 cl, void *(*oom_handler)(size_t size)) { + FreeList* list = &list_[cl]; + +#ifdef NO_TCMALLOC_SAMPLES + size = list->object_size(); +#endif + + ASSERT(size <= kMaxSize); + ASSERT(size != 0); + ASSERT(size == 0 || size == Static::sizemap()->ByteSizeForClass(cl)); + + void* rv; + if (!list->TryPop(&rv)) { + return FetchFromCentralCache(cl, size, oom_handler); + } + size_ -= size; + return rv; +} + +inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32 cl) { + ASSERT(list_[cl].max_length() > 0); + FreeList* list = &list_[cl]; + + // This catches back-to-back frees of allocs in the same size + // class. A more comprehensive (and expensive) test would be to walk + // the entire freelist. But this might be enough to find some bugs. + ASSERT(ptr != list->Next()); + + uint32_t length = list->Push(ptr); + + if (PREDICT_FALSE(length > list->max_length())) { + ListTooLong(list, cl); + return; + } + + size_ += list->object_size(); + if (PREDICT_FALSE(size_ > max_size_)){ + Scavenge(); + } +} + +inline ThreadCache* ThreadCache::GetThreadHeap() { +#ifdef HAVE_TLS + return threadlocal_data_.heap; +#else + return reinterpret_cast<ThreadCache *>( + perftools_pthread_getspecific(heap_key_)); +#endif +} + +inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() { +#ifdef HAVE_TLS + ASSERT(threadlocal_data_.heap); + return threadlocal_data_.heap; +#else + ASSERT(perftools_pthread_getspecific(heap_key_)); + return reinterpret_cast<ThreadCache *>( + perftools_pthread_getspecific(heap_key_)); +#endif +} + +inline ThreadCache* ThreadCache::GetCache() { +#ifdef HAVE_TLS + ThreadCache* ptr = GetThreadHeap(); +#else + ThreadCache* ptr = NULL; + if (PREDICT_TRUE(tsd_inited_)) { + ptr = GetThreadHeap(); + } +#endif + if (ptr == NULL) ptr = CreateCacheIfNecessary(); + return ptr; +} + +// In deletion paths, we do not try to create a thread-cache. This is +// because we may be in the thread destruction code and may have +// already cleaned up the cache for this thread. +inline ThreadCache* ThreadCache::GetCacheIfPresent() { +#ifndef HAVE_TLS + if (PREDICT_FALSE(!tsd_inited_)) return NULL; +#endif + return GetThreadHeap(); +} + +inline ThreadCache* ThreadCache::GetFastPathCache() { +#ifndef HAVE_TLS + return GetCacheIfPresent(); +#else + return threadlocal_data_.fast_path_heap; +#endif +} + +inline void ThreadCache::SetUseEmergencyMalloc() { +#ifdef HAVE_TLS + threadlocal_data_.fast_path_heap = NULL; + threadlocal_data_.use_emergency_malloc = true; +#endif +} + +inline void ThreadCache::ResetUseEmergencyMalloc() { +#ifdef HAVE_TLS + ThreadCache *heap = threadlocal_data_.heap; + threadlocal_data_.fast_path_heap = heap; + threadlocal_data_.use_emergency_malloc = false; +#endif +} + +inline bool ThreadCache::IsUseEmergencyMalloc() { +#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC) + return PREDICT_FALSE(threadlocal_data_.use_emergency_malloc); +#else + return false; +#endif +} + +inline void ThreadCache::SetMaxSize(int32 new_max_size) { + max_size_ = new_max_size; +} + +#ifndef NO_TCMALLOC_SAMPLES + +inline bool ThreadCache::SampleAllocation(size_t k) { + return !sampler_.RecordAllocation(k); +} + +inline bool ThreadCache::TryRecordAllocationFast(size_t k) { + return sampler_.TryRecordAllocationFast(k); +} + +#else + +inline bool ThreadCache::SampleAllocation(size_t k) { + return false; +} + +inline bool ThreadCache::TryRecordAllocationFast(size_t k) { + return true; +} + +#endif + +} // namespace tcmalloc + +#endif // TCMALLOC_THREAD_CACHE_H_ diff --git a/src/third_party/gperftools-2.7/src/windows/TODO b/src/third_party/gperftools-2.7/src/windows/TODO new file mode 100644 index 00000000000..708ec237ac4 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/TODO @@ -0,0 +1,86 @@ +* Get heap-profile-table.cc using DeleteMatchingFiles +* Get heap-profile-table.cc using FillProcSelfMaps, DumpProcSelfMaps +* Play around with ExperimentalGetStackTrace +* Support the windows-level memory-allocation functions? See + /home/build/googleclient/earth/client/tools/memorytracking/client/memorytrace/src/memorytrace.cpp + /home/build/googleclient/total_recall/common/sitestep/* + http://www.internals.com/articles/apispy/apispy.htm + http://www.wheaty.net/APISPY32.zip +* Verify /proc/xxx/maps: + http://www.geocities.com/wah_java_dotnet/procmap/index.html +* Figure out how to edit the executable IAT so tcmalloc.dll is loaded first +* Use QueryPerformanceCounter instead of GetTickCount() (also for sparsehash) + +---- +More info on windows-level memory-allocation functions: + C runtime malloc + LocalAlloc + GlobalAlloc + HeapAlloc + VirtualAlloc + mmap stuff + +malloc, LocalAlloc and GlobalAlloc call HeapAlloc, which calls +VirtualAlloc when needed, which calls VirtualAllocEx (the __sbrk equiv?) + +siggi sez: If you want to do a generic job, you probably need to +preserve the semantics of all of these Win32 calls: + Heap32First + Heap32ListFirst + Heap32ListNext + Heap32Next + HeapAlloc + HeapCompact + HeapCreate + HeapCreateTagsW + HeapDestroy + HeapExtend + HeapFree + HeapLock + HeapQueryInformation + HeapQueryTagW + HeapReAlloc + HeapSetInformation + HeapSize + HeapSummary + HeapUnlock + HeapUsage + HeapValidate + HeapWalk + +kernel32.dll export functions and nt.dll export functions: + http://www.shorthike.com/svn/trunk/tools_win32/dm/lib/kernel32.def + http://undocumented.ntinternals.net/ + +You can edit the executable IAT to have the patching DLL be the +first one loaded. + +Most complete way to intercept system calls is patch the functions +(not the IAT). + +Microsoft has somee built-in routines for heap-checking: + http://support.microsoft.com/kb/268343 + +---- +Itimer replacement: + http://msdn2.microsoft.com/en-us/library/ms712713.aspx + +---- +Changes I've had to make to the project file: + +0) When creating the project file, click on "no autogenerated files" + +--- For each project: +1) Alt-F7 -> General -> [pulldown "all configurations" ] -> Output Directory -> $(SolutionDir)$(ConfigurationName) +2) Alt-F7 -> General -> [pulldown "all configurations" ] -> Intermediate Directory -> $(ConfigurationName) + +--- For each .cc file: +1) Alt-F7 -> C/C++ -> General -> [pulldown "all configurations"] -> Additional Include Directives --> src/windows + src/ +2) Alt-F7 -> C/C++ -> Code Generation -> Runtime Library -> Multi-threaded, debug/release, DLL or not + +--- For DLL: +3) Alt-F7 -> Linker -> Input -> [pulldown "all configurations" ] -> Module Definition File -> src\windows\vc7and8.def +--- For binaries depending on a DLL: +3) Right-click on project -> Project Dependencies -> [add dll] +--- For static binaries (not depending on a DLL) +3) Alt-F7 -> C/C++ -> Command Line -> [pulldown "all configurations"] -> /D PERFTOOLS_DLL_DECL= diff --git a/src/third_party/gperftools-2.7/src/windows/addr2line-pdb.c b/src/third_party/gperftools-2.7/src/windows/addr2line-pdb.c new file mode 100644 index 00000000000..5c65a0357e5 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/addr2line-pdb.c @@ -0,0 +1,163 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> + +#include <windows.h> +#include <dbghelp.h> + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +void usage() { + fprintf(stderr, "usage: " + "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); + fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); +} + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + int i; + char* search; + char buf[256]; /* Enough to hold one hex address, I trust! */ + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG | SYMOPT_LOAD_LINES; + char* filename = "a.out"; /* The default if -e isn't specified */ + int print_function_name = 0; /* Set to 1 if -f is specified */ + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--functions") == 0 || strcmp(argv[i], "-f") == 0) { + print_function_name = 1; + } else if (strcmp(argv[i], "--demangle") == 0 || + strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "-e") == 0) { + if (i + 1 >= argc) { + fprintf(stderr, "FATAL ERROR: -e must be followed by a filename\n"); + return 1; + } + filename = argv[i+1]; + i++; /* to skip over filename too */ + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); + } else { + usage(); + exit(1); + } + } + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + buf[sizeof(buf)-1] = '\0'; /* Just to be safe */ + while (fgets(buf, sizeof(buf)-1, stdin)) { + /* GNU addr2line seems to just do a strtol and ignore any + * weird characters it gets, so we will too. + */ + unsigned __int64 addr = _strtoui64(buf, NULL, 16); + ULONG64 buffer[(sizeof(SYMBOL_INFO) + + MAX_SYM_NAME*sizeof(TCHAR) + + sizeof(ULONG64) - 1) + / sizeof(ULONG64)]; + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer; + IMAGEHLP_LINE64 line; + DWORD dummy; + pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO); + pSymbol->MaxNameLen = MAX_SYM_NAME; + if (print_function_name) { + if (SymFromAddr(process, (DWORD64)addr, NULL, pSymbol)) { + printf("%s\n", pSymbol->Name); + } else { + printf("??\n"); + } + } + line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + if (SymGetLineFromAddr64(process, (DWORD64)addr, &dummy, &line)) { + printf("%s:%d\n", line.FileName, (int)line.LineNumber); + } else { + printf("??:0\n"); + } + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/src/third_party/gperftools-2.7/src/windows/auto_testing_hook.h b/src/third_party/gperftools-2.7/src/windows/auto_testing_hook.h new file mode 100644 index 00000000000..fc2b71013e9 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/auto_testing_hook.h @@ -0,0 +1,156 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Utility for using SideStep with unit tests. + +#ifndef CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ +#define CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ + +#include "base/basictypes.h" +#include "base/logging.h" +#include "preamble_patcher.h" + +#define SIDESTEP_CHK(x) CHECK(x) +#define SIDESTEP_EXPECT_TRUE(x) SIDESTEP_CHK(x) + +namespace sidestep { + +// Same trick as common/scope_cleanup.h ScopeGuardImplBase +class AutoTestingHookBase { + public: + virtual ~AutoTestingHookBase() {} +}; + +// This is the typedef you normally use for the class, e.g. +// +// AutoTestingHook hook = MakeTestingHook(TargetFunc, HookTargetFunc); +// +// The 'hook' variable will then be destroyed when it goes out of scope. +// +// NOTE: You must not hold this type as a member of another class. Its +// destructor will not get called. +typedef const AutoTestingHookBase& AutoTestingHook; + +// This is the class you must use when holding a hook as a member of another +// class, e.g. +// +// public: +// AutoTestingHookHolder holder_; +// MyClass() : my_hook_holder(MakeTestingHookHolder(Target, Hook)) {} +class AutoTestingHookHolder { + public: + explicit AutoTestingHookHolder(AutoTestingHookBase* hook) : hook_(hook) {} + ~AutoTestingHookHolder() { delete hook_; } + private: + AutoTestingHookHolder() {} // disallow + AutoTestingHookBase* hook_; +}; + +// This class helps patch a function, then unpatch it when the object exits +// scope, and also maintains the pointer to the original function stub. +// +// To enable use of the class without having to explicitly provide the +// type of the function pointers (and instead only providing it +// implicitly) we use the same trick as ScopeGuard (see +// common/scope_cleanup.h) uses, so to create a hook you use the MakeHook +// function rather than a constructor. +// +// NOTE: This function is only safe for e.g. unit tests and _not_ for +// production code. See PreamblePatcher class for details. +template <typename T> +class AutoTestingHookImpl : public AutoTestingHookBase { + public: + static AutoTestingHookImpl<T> MakeTestingHook(T target_function, + T replacement_function, + bool do_it) { + return AutoTestingHookImpl<T>(target_function, replacement_function, do_it); + } + + static AutoTestingHookImpl<T>* MakeTestingHookHolder(T target_function, + T replacement_function, + bool do_it) { + return new AutoTestingHookImpl<T>(target_function, + replacement_function, do_it); + } + + ~AutoTestingHookImpl() { + if (did_it_) { + SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Unpatch( + (void*)target_function_, (void*)replacement_function_, + (void*)original_function_)); + } + } + + // Returns a pointer to the original function. To use this method you will + // have to explicitly create an AutoTestingHookImpl of the specific + // function pointer type (i.e. not use the AutoTestingHook typedef). + T original_function() { + return original_function_; + } + + private: + AutoTestingHookImpl(T target_function, T replacement_function, bool do_it) + : target_function_(target_function), + original_function_(NULL), + replacement_function_(replacement_function), + did_it_(do_it) { + if (do_it) { + SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Patch(target_function, + replacement_function, + &original_function_)); + } + } + + T target_function_; // always valid + T original_function_; // always valid + T replacement_function_; // always valid + bool did_it_; // Remember if we did it or not... +}; + +template <typename T> +inline AutoTestingHookImpl<T> MakeTestingHook(T target, + T replacement, + bool do_it) { + return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, do_it); +} + +template <typename T> +inline AutoTestingHookImpl<T> MakeTestingHook(T target, T replacement) { + return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, true); +} + +template <typename T> +inline AutoTestingHookImpl<T>* MakeTestingHookHolder(T target, T replacement) { + return AutoTestingHookImpl<T>::MakeTestingHookHolder(target, replacement, + true); +} + +}; // namespace sidestep + +#endif // CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_ diff --git a/src/third_party/gperftools-2.7/src/windows/config.h b/src/third_party/gperftools-2.7/src/windows/config.h new file mode 100644 index 00000000000..e860bc35353 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/config.h @@ -0,0 +1,367 @@ +/* A manual version of config.h fit for windows machines. + * + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +/* Sometimes we accidentally #include this config.h instead of the one + in .. -- this is particularly true for msys/mingw, which uses the + unix config.h but also runs code in the windows directory. + */ +#ifdef __MINGW32__ +#include "../config.h" +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#endif + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +/* used by tcmalloc.h */ +#define GPERFTOOLS_CONFIG_H_ + +/* define this if you are linking tcmalloc statically and overriding the + * default allocators. + * For instructions on how to use this mode, see + * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + */ +/* #undef WIN32_OVERRIDE_ALLOCATORS */ + +/* Build new/delete operators for overaligned types */ +/* #undef ENABLE_ALIGNED_NEW_DELETE */ + +/* Build runtime detection for sized delete */ +/* #undef ENABLE_DYNAMIC_SIZED_DELETE */ + +/* Build sized deletion operators */ +/* #undef ENABLE_SIZED_DELETE */ + +/* Define to 1 if compiler supports __builtin_expect */ +/* #undef HAVE_BUILTIN_EXPECT */ + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +/* #undef HAVE_BUILTIN_STACK_POINTER */ + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +/* #undef HAVE_CONFLICT_SIGNAL_H */ + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +/* #undef HAVE_CYGWIN_SIGNAL_H */ + +/* Define to 1 if you have the declaration of `backtrace', and to 0 if you + don't. */ +/* #undef HAVE_DECL_BACKTRACE */ + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#define HAVE_DECL_CFREE 0 + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#define HAVE_DECL_MEMALIGN 0 + +/* Define to 1 if you have the declaration of `nanosleep', and to 0 if you + don't. */ +#define HAVE_DECL_NANOSLEEP 0 + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#define HAVE_DECL_POSIX_MEMALIGN 0 + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#define HAVE_DECL_PVALLOC 0 + +/* Define to 1 if you have the declaration of `sleep', and to 0 if you don't. + */ +#define HAVE_DECL_SLEEP 0 + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#define HAVE_DECL_UNAME 0 + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#define HAVE_DECL_VALLOC 0 + +/* Define to 1 if you have the <dlfcn.h> header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +/* #undef HAVE_ELF32_VERSYM */ + +/* Define to 1 if you have the <execinfo.h> header file. */ +/* #undef HAVE_EXECINFO_H */ + +/* Define to 1 if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the <features.h> header file. */ +/* #undef HAVE_FEATURES_H */ + +/* Define to 1 if you have the `fork' function. */ +/* #undef HAVE_FORK */ + +/* Define to 1 if you have the `geteuid' function. */ +/* #undef HAVE_GETEUID */ + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 /* we define it in windows/port.cc */ + +/* Define to 1 if you have the <glob.h> header file. */ +/* #undef HAVE_GLOB_H */ + +/* Define to 1 if you have the <grp.h> header file. */ +/* #undef HAVE_GRP_H */ + +/* Define to 1 if you have the <inttypes.h> header file. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_INTTYPES_H 1 +#endif + +/* Define to 1 if you have the <libunwind.h> header file. */ +/* #undef HAVE_LIBUNWIND_H */ + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +/* #undef HAVE_LINUX_PTRACE_H */ + +/* Define if this is Linux that has SIGEV_THREAD_ID */ +/* #undef HAVE_LINUX_SIGEV_THREAD_ID */ + +/* Define to 1 if you have the <malloc.h> header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have a working `mmap' system call. */ +/* #undef HAVE_MMAP */ + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +/* #undef HAVE_POLL_H */ + +/* define if libc has program_invocation_name */ +/* #undef HAVE_PROGRAM_INVOCATION_NAME */ + +/* Define if you have POSIX threads libraries and header files. */ +/* #undef HAVE_PTHREAD */ + +/* defined to 1 if pthread symbols are exposed even without include pthread.h + */ +/* #undef HAVE_PTHREAD_DESPITE_ASKING_FOR */ + +/* Define to 1 if you have the <pwd.h> header file. */ +/* #undef HAVE_PWD_H */ + +/* Define to 1 if you have the `sbrk' function. */ +/* #undef HAVE_SBRK */ + +/* Define to 1 if you have the <sched.h> header file. */ +/* #undef HAVE_SCHED_H */ + +/* Define to 1 if you have the <stdint.h> header file. */ +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_STDINT_H 1 +#endif + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +/* #undef HAVE_STRINGS_H */ + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +/* #undef HAVE_STRUCT_MALLINFO */ + +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the <sys/param.h> header file. */ +/* #undef HAVE_SYS_PARAM_H */ + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +/* #undef HAVE_SYS_PRCTL_H */ + +/* Define to 1 if you have the <sys/resource.h> header file. */ +/* #undef HAVE_SYS_RESOURCE_H */ + +/* Define to 1 if you have the <sys/socket.h> header file. */ +/* #undef HAVE_SYS_SOCKET_H */ + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +/* #undef HAVE_SYS_SYSCALL_H */ + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +/* #undef HAVE_SYS_UCONTEXT_H */ + +/* Define to 1 if you have the <sys/wait.h> header file. */ +/* #undef HAVE_SYS_WAIT_H */ + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +/* #undef HAVE_UCONTEXT_H */ + +/* Define to 1 if you have the <unistd.h> header file. */ +/* #undef HAVE_UNISTD_H */ + +/* Whether <unwind.h> contains _Unwind_Backtrace */ +/* #undef HAVE_UNWIND_BACKTRACE */ + +/* Define to 1 if you have the <unwind.h> header file. */ +/* #undef HAVE_UNWIND_H */ + +/* Define to 1 if you have the <valgrind.h> header file. */ +/* #undef HAVE_VALGRIND_H */ + +/* define if your compiler has __attribute__ */ +/* #undef HAVE___ATTRIBUTE__ */ + +/* define if your compiler supports alignment of functions */ +/* #undef HAVE___ATTRIBUTE__ALIGNED_FN */ + +/* Define to 1 if compiler supports __environ */ +/* #undef HAVE___ENVIRON */ + +/* Define to 1 if the system has the type `__int64'. */ +#define HAVE___INT64 1 + +/* prefix where we look for installed files */ +/* #undef INSTALL_PREFIX */ + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#ifndef _WIN64 +#define INT32_EQUALS_INTPTR 1 +#endif + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +/* #undef LT_OBJDIR */ + +/* Name of package */ +#define PACKAGE "gperftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "gperftools@googlegroups.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "gperftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "gperftools 2.7" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "gperftools" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "2.7" + +/* How to access the PC from a struct ucontext */ +/* #undef PC_FROM_UCONTEXT */ + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#ifndef PERFTOOLS_DLL_DECL +# define PERFTOOLS_IS_A_DLL 1 /* not set if you're statically linking */ +# define PERFTOOLS_DLL_DECL __declspec(dllexport) +# define PERFTOOLS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIdS "lld" +#else +#define PRIdS "d" +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIuS "llu" +#else +#define PRIuS "u" +#endif + +/* printf format code for printing a size_t and ssize_t */ +#ifdef _WIN64 +#define PRIxS "llx" +#else +#define PRIxS "x" +#endif + +/* Mark the systems where we know it's bad if pthreads runs too + early before main (before threads are initialized, presumably). */ +#ifdef __FreeBSD__ +#define PTHREADS_CRASHES_IF_RUN_TOO_EARLY 1 +#endif + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Define 32K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_32K_PAGES */ + +/* Define 64K of internal pages size for tcmalloc */ +/* #undef TCMALLOC_64K_PAGES */ + +/* Define 8 bytes of allocation alignment for tcmalloc */ +/* #undef TCMALLOC_ALIGN_8BYTES */ + +/* Version number of package */ +#define VERSION "2.7" + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +// --------------------------------------------------------------------- +// Extra stuff not found in config.h.in + +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define HAVE_SNPRINTF 1 +#endif + +// We want to make sure not to ever try to #include heap-checker.h +#define NO_HEAP_CHECK 1 + +// TODO(csilvers): include windows/port.h in every relevant source file instead? +#include "windows/port.h" + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ */ diff --git a/src/third_party/gperftools-2.7/src/windows/get_mangled_names.cc b/src/third_party/gperftools-2.7/src/windows/get_mangled_names.cc new file mode 100644 index 00000000000..08bd03be6fb --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/get_mangled_names.cc @@ -0,0 +1,65 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein (opensource@google.com) + +// When you are porting perftools to a new compiler or architecture +// (win64 vs win32) for instance, you'll need to change the mangled +// symbol names for operator new and friends at the top of +// patch_functions.cc. This file helps you do that. +// +// It does this by defining these functions with the proper signature. +// All you need to do is compile this file and the run dumpbin on it. +// (See http://msdn.microsoft.com/en-us/library/5x49w699.aspx for more +// on dumpbin). To do this in MSVC, use the MSVC commandline shell: +// http://msdn.microsoft.com/en-us/library/ms235639(VS.80).aspx) +// +// The run: +// cl /c get_mangled_names.cc +// dumpbin /symbols get_mangled_names.obj +// +// It will print out the mangled (and associated unmangled) names of +// the 8 symbols you need to put at the top of patch_functions.cc + +#include <sys/types.h> // for size_t +#include <new> // for nothrow_t + +static char m; // some dummy memory so new doesn't return NULL. + +void* operator new(size_t size) { return &m; } +void operator delete(void* p) throw() { } +void* operator new[](size_t size) { return &m; } +void operator delete[](void* p) throw() { } + +void* operator new(size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete(void* p, const std::nothrow_t&) throw() { } +void* operator new[](size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete[](void* p, const std::nothrow_t&) throw() { } diff --git a/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h b/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h new file mode 100644 index 00000000000..46fb4eafc50 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h @@ -0,0 +1,155 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ +#ifdef __cplusplus +#include <new> /* for std::nothrow_t, std::align_val_t */ +#endif + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR 2 +#define TC_VERSION_MINOR 7 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "gperftools 2.7" + +#ifndef PERFTOOLS_NOTHROW + +#if __cplusplus >= 201103L +#define PERFTOOLS_NOTHROW noexcept +#elif defined(__cplusplus) +#define PERFTOOLS_NOTHROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_NOTHROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_NOTHROW +# endif +#endif + +#endif + +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW; + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + +#if defined(__cpp_aligned_new) || (defined(_MSVC_LANG) && _MSVC_LANG > 201402L) + PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; +#endif +} +#endif + +/* We're only un-defining for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_NOTHROW + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h.in b/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h.in new file mode 100644 index 00000000000..adb79629e07 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/gperftools/tcmalloc.h.in @@ -0,0 +1,155 @@ +// -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +#include <stddef.h> /* for size_t */ +#ifdef __cplusplus +#include <new> /* for std::nothrow_t, std::align_val_t */ +#endif + +/* Define the version number so folks can check against it */ +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +#ifndef PERFTOOLS_NOTHROW + +#if __cplusplus >= 201103L +#define PERFTOOLS_NOTHROW noexcept +#elif defined(__cplusplus) +#define PERFTOOLS_NOTHROW throw() +#else +# ifdef __GNUC__ +# define PERFTOOLS_NOTHROW __attribute__((__nothrow__)) +# else +# define PERFTOOLS_NOTHROW +# endif +#endif + +#endif + +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /* + * Returns a human-readable version string. If major, minor, + * and/or patch are not NULL, they are set to the major version, + * minor version, and patch-code (a string, usually ""). + */ + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_free_sized(void *ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) PERFTOOLS_NOTHROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) PERFTOOLS_NOTHROW; + + /* + * This is an alias for MallocExtension::instance()->GetAllocatedSize(). + * It is equivalent to + * OS X: malloc_size() + * glibc: malloc_usable_size() + * Windows: _msize() + */ + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) PERFTOOLS_NOTHROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized(void* p, size_t size) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + +#if defined(__cpp_aligned_new) || (defined(_MSVC_LANG) && _MSVC_LANG > 201402L) + PERFTOOLS_DLL_DECL void* tc_new_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_new_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_delete_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void* tc_newarray_aligned(size_t size, std::align_val_t al); + PERFTOOLS_DLL_DECL void* tc_newarray_aligned_nothrow(size_t size, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned(void* p, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_sized_aligned(void* p, size_t size, std::align_val_t al) PERFTOOLS_NOTHROW; + PERFTOOLS_DLL_DECL void tc_deletearray_aligned_nothrow(void* p, std::align_val_t al, + const std::nothrow_t&) PERFTOOLS_NOTHROW; +#endif +} +#endif + +/* We're only un-defining for public */ +#if !defined(GPERFTOOLS_CONFIG_H_) + +#undef PERFTOOLS_NOTHROW + +#endif /* GPERFTOOLS_CONFIG_H_ */ + +#endif /* #ifndef TCMALLOC_TCMALLOC_H_ */ diff --git a/src/third_party/gperftools-2.7/src/windows/ia32_modrm_map.cc b/src/third_party/gperftools-2.7/src/windows/ia32_modrm_map.cc new file mode 100644 index 00000000000..f1f1906289c --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/ia32_modrm_map.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Table of relevant information about how to decode the ModR/M byte. + * Based on information in the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. + */ + +#include "mini_disassembler.h" +#include "mini_disassembler_types.h" + +namespace sidestep { + +const ModrmEntry MiniDisassembler::s_ia16_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, false, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_WORD }, + /* r/m == 001 */ { true, false, OS_WORD }, + /* r/m == 010 */ { true, false, OS_WORD }, + /* r/m == 011 */ { true, false, OS_WORD }, + /* r/m == 100 */ { true, false, OS_WORD }, + /* r/m == 101 */ { true, false, OS_WORD }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { true, false, OS_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO } +}; + +const ModrmEntry MiniDisassembler::s_ia32_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, true, OS_ZERO }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, true, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 001 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 010 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 011 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 100 */ { true, true, OS_DOUBLE_WORD }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 111 */ { true, false, OS_DOUBLE_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +}; + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.7/src/windows/ia32_opcode_map.cc b/src/third_party/gperftools-2.7/src/windows/ia32_opcode_map.cc new file mode 100644 index 00000000000..ba6a79e3d19 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/ia32_opcode_map.cc @@ -0,0 +1,1219 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Opcode decoding maps. Based on the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. Idea + * for how to lay out the tables in memory taken from the implementation + * in the Bastard disassembly environment. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +/* +* This is the first table to be searched; the first field of each +* Opcode in the table is either 0 to indicate you're in the +* right table, or an index to the correct table, in the global +* map g_pentiumOpcodeMap +*/ +const Opcode s_first_opcode_byte[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 1, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x11 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x12 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x13 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x14 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x15 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x16 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x17 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x18 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "daa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x29 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "das", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x30 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aaa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* REX Prefixes in 64-bit mode. */ + /* 0x40 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else + /* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif + /* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x55 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x56 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x57 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x58 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x59 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x60 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x61 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x62 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_A, AM_NOT_USED, "bound", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x63 */ { 0, IT_GENERIC, AM_E | OT_W, AM_G | OT_W, AM_NOT_USED, "arpl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x64 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x65 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x66 */ { 0, IT_PREFIX_OPERAND, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x67 */ { 0, IT_PREFIX_ADDRESS, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x68 */ { 0, IT_GENERIC, AM_I | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x69 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_V, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6A */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_B, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6C */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "insb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6D */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "insd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6E */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X | OT_B, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X | OT_V, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x70 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x71 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x75 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x76 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x77 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x78 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7E */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7F */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x80 */ { 2, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 3, IT_REFERENCE, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 4, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 5, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_GENERIC, AM_E | OT_W, AM_S | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, "lea", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_GENERIC, AM_S | OT_W, AM_E | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "nop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cwde", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cdq", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "callf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wait", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_O | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_O | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_O | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_O | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "movsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "movsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "cmpsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "cmpsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "stosb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "stosd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X| OT_B, AM_NOT_USED, "lodsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X| OT_V, AM_NOT_USED, "lodsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_Y | OT_B, AM_NOT_USED, "scasb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_Y | OT_V, AM_NOT_USED, "scasd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#ifdef _M_X64 + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#else + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +#endif + /* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC3 */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "les", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "lds", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_I | OT_W, AM_I | OT_B, AM_NOT_USED, "enter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "leave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "int3", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "int", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "into", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "iret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 8, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 9, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD2 */ { 10, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD3 */ { 11, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aam", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "xlat", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following 8 lines would be references to the FPU tables, but we currently + // do not support the FPU instructions in this disassembler. + + /* 0xD8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDA */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDB */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDC */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDD */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDE */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDF */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + + /* 0xE0 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE1 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE2 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE3 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jcxz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE8 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE9 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEA */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEB */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xED */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEF */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_V, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF0 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lock:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF2 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "repne:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF3 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rep:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "hlt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF6 */ { 12, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF7 */ { 13, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cli", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFB */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFD */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "std", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFE */ { 14, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFF */ { 15, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f[] = { + /* 0x0 */ { 16, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 17, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lsl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "invd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wbinvd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud2", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x11 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_W | OT_SD, AM_V | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_SS, AM_V | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x12 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // only one of ... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_S, AM_NOT_USED, "movlpd" } }, + /* 0x13 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlpd" } }, + /* 0x14 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpcklps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpcklpd" } }, + /* 0x15 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpckhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpckhpd" } }, + /* 0x16 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // only one of... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x17 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x18 */ { 18, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_R | OT_D, AM_C | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_R | OT_D, AM_D | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_C | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_D | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x29 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x2A */ { 0, IT_GENERIC, AM_V | OT_PS, AM_Q | OT_Q, AM_NOT_USED, "cvtpi2ps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_E | OT_D, AM_NOT_USED, "cvtsi2sd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_E | OT_D, AM_NOT_USED, "cvtsi2ss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_Q | OT_DQ, AM_NOT_USED, "cvtpi2pd" } }, + /* 0x2B */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movntps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movntpd" } }, + /* 0x2C */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvttps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvttsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvttss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2pi" } }, + /* 0x2D */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvtps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvtsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvtss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2pi" } }, + /* 0x2E */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "ucomiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "ucomisd" } }, + /* 0x2F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_SS, AM_NOT_USED, "comiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "comisd" } }, + /* 0x30 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wrmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdtsc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdpmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysenter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysexit", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x40 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmova", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x50 */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PS, AM_NOT_USED, "movmskps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PD, AM_NOT_USED, "movmskpd" } }, + /* 0x51 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "sqrtps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "sqrtsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "sqrtss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "sqrtpd" } }, + /* 0x52 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rsqrtps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rsqrtss" }, + /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rcpps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rcpss" }, + /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andpd" } }, + /* 0x55 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andnps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andnpd" } }, + /* 0x56 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "orps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "orpd" } }, + /* 0x57 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "xorps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "xorpd" } }, + /* 0x58 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "addps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "addsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "addss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "addpd" } }, + /* 0x59 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "mulps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "mulsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "mulss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "mulpd" } }, + /* 0x5A */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PS, AM_NOT_USED, "cvtps2pd", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "cvtsd2ss" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "cvtss2sd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PD, AM_NOT_USED, "cvtpd2ps" } }, + /* 0x5B */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2ps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvttps2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvtps2dq" } }, + /* 0x5C */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "subps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "subsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "subss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "subpd" } }, + /* 0x5D */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "minps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "minsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "minss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "minpd" } }, + /* 0x5E */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "divps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "divsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "divss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "divpd" } }, + /* 0x5F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "maxps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "maxsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "maxss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "maxpd" } }, + /* 0x60 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklbw" } }, + /* 0x61 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklwd" } }, + /* 0x62 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpckldq" } }, + /* 0x63 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packsswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packsswb" } }, + /* 0x64 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtb" } }, + /* 0x65 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtw" } }, + /* 0x66 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtd" } }, + /* 0x67 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packuswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packuswb" } }, + /* 0x68 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhbw" } }, + /* 0x69 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhwd" } }, + /* 0x6A */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhdq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhdq" } }, + /* 0x6B */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packssdw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "packssdw" } }, + /* 0x6C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6E */ { 0, IT_GENERIC, AM_P | OT_D, AM_E | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_NOT_USED, "movd" } }, + /* 0x6F */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x70 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_I | OT_B, "pshuf", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshuflw" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufhw" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufd" } }, + /* 0x71 */ { 19, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 20, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 21, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqb" } }, + /* 0x75 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqw" } }, + /* 0x76 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqd" } }, + /* 0x77 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "emms", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following six opcodes are escapes into the MMX stuff, which this disassembler does not support. + /* 0x78 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + /* 0x7E */ { 0, IT_GENERIC, AM_E | OT_D, AM_P | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movq" }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_DQ, AM_NOT_USED, "movd" } }, + /* 0x7F */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_P | OT_Q, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x80 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seto", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seta", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "sets", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cpuid", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rsm", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 22, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lss", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lfs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lgs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud1", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 23, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "cmpps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_I | OT_B, "cmpsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_I | OT_B, "cmpss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "cmppd" } }, + /* 0xC3 */ { 0, IT_GENERIC, AM_E | OT_D, AM_G | OT_D, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_E | OT_D, AM_I | OT_B, "pinsrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_I | OT_B, "pinsrw" } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_I | OT_B, "pextrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_I | OT_B, "pextrw" } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "shufps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "shufpd" } }, + /* 0xC7 */ { 24, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlw" } }, + /* 0xD2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrld" } }, + /* 0xD3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlq" } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddq" } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmullw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmullw" } }, + /* 0xD6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "unused without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_P | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movdq2q" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_Q | OT_Q, AM_NOT_USED, "movq2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movq" } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_NOT_USED, "pmovmskb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_NOT_USED, "pmovmskb" } }, + /* 0xD8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusb" } }, + /* 0xD9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusw" } }, + /* 0xDA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminub" } }, + /* 0xDB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pand", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pand" } }, + /* 0xDC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusb" } }, + /* 0xDD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusw" } }, + /* 0xDE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxub" } }, + /* 0xDF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pandn", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pandn" } }, + /* 0xE0 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgb" } }, + /* 0xE1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrqw" } }, + /* 0xE2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrad" } }, + /* 0xE3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgw" } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhuw" } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhw" } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2dq" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2pd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2dq" } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movntq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movntdq" } }, + /* 0xE8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsb" } }, + /* 0xE9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsw" } }, + /* 0xEA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminsw" } }, + /* 0xEB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "por", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "por" } }, + /* 0xEC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsb" } }, + /* 0xED */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsw" } }, + /* 0xEE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxsw" } }, + /* 0xEF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pxor", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pxor" } }, + /* 0xF0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllw" } }, + /* 0xF2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pslld" } }, + /* 0xF3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllq" } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmuludq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmuludq" } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaddwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaddwd" } }, + /* 0xF6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psadbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psadbw" } }, + /* 0xF7 */ { 0, IT_GENERIC, AM_P | OT_PI, AM_Q | OT_PI, AM_NOT_USED, "maskmovq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "maskmovdqu" } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubb" } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubw" } }, + /* 0xFA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubd" } }, + /* 0xFB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubq" } }, + /* 0xFC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddb" } }, + /* 0xFD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddw" } }, + /* 0xFE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddd" } }, + /* 0xFF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f00[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "sldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "str", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "ltr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f01[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "smsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lmsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_M | OT_B, AM_NOT_USED, AM_NOT_USED, "invlpg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f18[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f71[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlw" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psraw" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllw" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f72[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrld" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrad" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslld" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f73[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlq" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllq" } }, + /* 0x7 */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq" } }, +}; + +const Opcode s_opcode_byte_after_0fae[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxsave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxrstor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ldmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "mfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clflush/sfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +}; + +const Opcode s_opcode_byte_after_0fba[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0fc7[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_Q, AM_NOT_USED, AM_NOT_USED, "cmpxch8b", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_80[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_81[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_82[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_83[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d2[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d3[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f6[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f7[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_fe[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_ff[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +/* +* A table of all the other tables, containing some extra information, e.g. +* how to mask out the byte we're looking at. +*/ +const OpcodeTable MiniDisassembler::s_ia32_opcode_map_[]={ + // One-byte opcodes and jumps to larger + /* 0 */ {s_first_opcode_byte, 0, 0xff, 0, 0xff}, + // Two-byte opcodes (second byte) + /* 1 */ {s_opcode_byte_after_0f, 0, 0xff, 0, 0xff}, + // Start of tables for opcodes using ModR/M bits as extension + /* 2 */ {s_opcode_byte_after_80, 3, 0x07, 0, 0x07}, + /* 3 */ {s_opcode_byte_after_81, 3, 0x07, 0, 0x07}, + /* 4 */ {s_opcode_byte_after_82, 3, 0x07, 0, 0x07}, + /* 5 */ {s_opcode_byte_after_83, 3, 0x07, 0, 0x07}, + /* 6 */ {s_opcode_byte_after_c0, 3, 0x07, 0, 0x07}, + /* 7 */ {s_opcode_byte_after_c1, 3, 0x07, 0, 0x07}, + /* 8 */ {s_opcode_byte_after_d0, 3, 0x07, 0, 0x07}, + /* 9 */ {s_opcode_byte_after_d1, 3, 0x07, 0, 0x07}, + /* 10 */ {s_opcode_byte_after_d2, 3, 0x07, 0, 0x07}, + /* 11 */ {s_opcode_byte_after_d3, 3, 0x07, 0, 0x07}, + /* 12 */ {s_opcode_byte_after_f6, 3, 0x07, 0, 0x07}, + /* 13 */ {s_opcode_byte_after_f7, 3, 0x07, 0, 0x07}, + /* 14 */ {s_opcode_byte_after_fe, 3, 0x07, 0, 0x01}, + /* 15 */ {s_opcode_byte_after_ff, 3, 0x07, 0, 0x07}, + /* 16 */ {s_opcode_byte_after_0f00, 3, 0x07, 0, 0x07}, + /* 17 */ {s_opcode_byte_after_0f01, 3, 0x07, 0, 0x07}, + /* 18 */ {s_opcode_byte_after_0f18, 3, 0x07, 0, 0x07}, + /* 19 */ {s_opcode_byte_after_0f71, 3, 0x07, 0, 0x07}, + /* 20 */ {s_opcode_byte_after_0f72, 3, 0x07, 0, 0x07}, + /* 21 */ {s_opcode_byte_after_0f73, 3, 0x07, 0, 0x07}, + /* 22 */ {s_opcode_byte_after_0fae, 3, 0x07, 0, 0x07}, + /* 23 */ {s_opcode_byte_after_0fba, 3, 0x07, 0, 0x07}, + /* 24 */ {s_opcode_byte_after_0fc7, 3, 0x07, 0, 0x01} +}; + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.7/src/windows/mingw.h b/src/third_party/gperftools-2.7/src/windows/mingw.h new file mode 100644 index 00000000000..c91a3132249 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/mingw.h @@ -0,0 +1,74 @@ +/* -*- Mode: C; c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * MinGW is an interesting mix of unix and windows. We use a normal + * configure script, but still need the windows port.h to define some + * stuff that MinGW doesn't support, like pthreads. + */ + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ + +#ifdef __MINGW32__ + +// Older version of the mingw msvcrt don't define _aligned_malloc +#if __MSVCRT_VERSION__ < 0x0700 +# define PERFTOOLS_NO_ALIGNED_MALLOC 1 +#endif + +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + +#define HAVE_SNPRINTF 1 + +// Some mingw distributions have a pthreads wrapper, but it doesn't +// work as well as native windows spinlocks (at least for us). So +// pretend the pthreads wrapper doesn't exist, even when it does. +#ifndef HAVE_PTHREAD_DESPITE_ASKING_FOR +#undef HAVE_PTHREAD +#endif + +#undef HAVE_FORK + +#define HAVE_PID_T + +#include "windows/port.h" + +#endif /* __MINGW32__ */ + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */ diff --git a/src/third_party/gperftools-2.7/src/windows/mini_disassembler.cc b/src/third_party/gperftools-2.7/src/windows/mini_disassembler.cc new file mode 100644 index 00000000000..0c620047cec --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/mini_disassembler.cc @@ -0,0 +1,432 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Implementation of MiniDisassembler. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, + bool address_default_is_32_bits) + : operand_default_is_32_bits_(operand_default_is_32_bits), + address_default_is_32_bits_(address_default_is_32_bits) { + Initialize(); +} + +MiniDisassembler::MiniDisassembler() + : operand_default_is_32_bits_(true), + address_default_is_32_bits_(true) { + Initialize(); +} + +InstructionType MiniDisassembler::Disassemble( + unsigned char* start_byte, + unsigned int& instruction_bytes) { + // Clean up any state from previous invocations. + Initialize(); + + // Start by processing any prefixes. + unsigned char* current_byte = start_byte; + unsigned int size = 0; + InstructionType instruction_type = ProcessPrefixes(current_byte, size); + + if (IT_UNKNOWN == instruction_type) + return instruction_type; + + current_byte += size; + size = 0; + + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ + // and address_is_32_bits_ flags are correctly set. + + instruction_type = ProcessOpcode(current_byte, 0, size); + + // Check for error processing instruction + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { + return IT_UNKNOWN; + } + + current_byte += size; + + // Invariant: operand_bytes_ indicates the total size of operands + // specified by the opcode and/or ModR/M byte and/or SIB byte. + // pCurrentByte points to the first byte after the ModR/M byte, or after + // the SIB byte if it is present (i.e. the first byte of any operands + // encoded in the instruction). + + // We get the total length of any prefixes, the opcode, and the ModR/M and + // SIB bytes if present, by taking the difference of the original starting + // address and the current byte (which points to the first byte of the + // operands if present, or to the first byte of the next instruction if + // they are not). Adding the count of bytes in the operands encoded in + // the instruction gives us the full length of the instruction in bytes. + instruction_bytes += operand_bytes_ + (current_byte - start_byte); + + // Return the instruction type, which was set by ProcessOpcode(). + return instruction_type_; +} + +void MiniDisassembler::Initialize() { + operand_is_32_bits_ = operand_default_is_32_bits_; + address_is_32_bits_ = address_default_is_32_bits_; +#ifdef _M_X64 + operand_default_support_64_bits_ = true; +#else + operand_default_support_64_bits_ = false; +#endif + operand_is_64_bits_ = false; + operand_bytes_ = 0; + have_modrm_ = false; + should_decode_modrm_ = false; + instruction_type_ = IT_UNKNOWN; + got_f2_prefix_ = false; + got_f3_prefix_ = false; + got_66_prefix_ = false; +} + +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, + unsigned int& size) { + InstructionType instruction_type = IT_GENERIC; + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; + + switch (opcode.type_) { + case IT_PREFIX_ADDRESS: + address_is_32_bits_ = !address_default_is_32_bits_; + goto nochangeoperand; + case IT_PREFIX_OPERAND: + operand_is_32_bits_ = !operand_default_is_32_bits_; + nochangeoperand: + case IT_PREFIX: + + if (0xF2 == (*start_byte)) + got_f2_prefix_ = true; + else if (0xF3 == (*start_byte)) + got_f3_prefix_ = true; + else if (0x66 == (*start_byte)) + got_66_prefix_ = true; + else if (operand_default_support_64_bits_ && (*start_byte) & 0x48) + operand_is_64_bits_ = true; + + instruction_type = opcode.type_; + size ++; + // we got a prefix, so add one and check next byte + ProcessPrefixes(start_byte + 1, size); + default: + break; // not a prefix byte + } + + return instruction_type; +} + +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, + unsigned int table_index, + unsigned int& size) { + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table + unsigned char current_byte = (*start_byte) >> table.shift_; + current_byte = current_byte & table.mask_; // Mask out the bits we will use + + // Check whether the byte we have is inside the table we have. + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { + instruction_type_ = IT_UNKNOWN; + return instruction_type_; + } + + const Opcode& opcode = table.table_[current_byte]; + if (IT_UNUSED == opcode.type_) { + // This instruction is not used by the IA-32 ISA, so we indicate + // this to the user. Probably means that we were pointed to + // a byte in memory that was not the start of an instruction. + instruction_type_ = IT_UNUSED; + return instruction_type_; + } else if (IT_REFERENCE == opcode.type_) { + // We are looking at an opcode that has more bytes (or is continued + // in the ModR/M byte). Recursively find the opcode definition in + // the table for the opcode's next byte. + size++; + ProcessOpcode(start_byte + 1, opcode.table_index_, size); + return instruction_type_; + } + + const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; + if (opcode.is_prefix_dependent_) { + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f2_prefix_; + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f3_prefix_; + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_66_prefix_; + } + } + + // Inv: The opcode type is known. + instruction_type_ = specific_opcode->type_; + + // Let's process the operand types to see if we have any immediate + // operands, and/or a ModR/M byte. + + ProcessOperand(specific_opcode->flag_dest_); + ProcessOperand(specific_opcode->flag_source_); + ProcessOperand(specific_opcode->flag_aux_); + + // Inv: We have processed the opcode and incremented operand_bytes_ + // by the number of bytes of any operands specified by the opcode + // that are stored in the instruction (not registers etc.). Now + // we need to return the total number of bytes for the opcode and + // for the ModR/M or SIB bytes if they are present. + + if (table.mask_ != 0xff) { + if (have_modrm_) { + // we're looking at a ModR/M byte so we're not going to + // count that into the opcode size + ProcessModrm(start_byte, size); + return IT_GENERIC; + } else { + // need to count the ModR/M byte even if it's just being + // used for opcode extension + size++; + return IT_GENERIC; + } + } else { + if (have_modrm_) { + // The ModR/M byte is the next byte. + size++; + ProcessModrm(start_byte + 1, size); + return IT_GENERIC; + } else { + size++; + return IT_GENERIC; + } + } +} + +bool MiniDisassembler::ProcessOperand(int flag_operand) { + bool succeeded = true; + if (AM_NOT_USED == flag_operand) + return succeeded; + + // Decide what to do based on the addressing mode. + switch (flag_operand & AM_MASK) { + // No ModR/M byte indicated by these addressing modes, and no + // additional (e.g. immediate) parameters. + case AM_A: // Direct address + case AM_F: // EFLAGS register + case AM_X: // Memory addressed by the DS:SI register pair + case AM_Y: // Memory addressed by the ES:DI register pair + case AM_IMPLICIT: // Parameter is implicit, occupies no space in + // instruction + break; + + // There is a ModR/M byte but it does not necessarily need + // to be decoded. + case AM_C: // reg field of ModR/M selects a control register + case AM_D: // reg field of ModR/M selects a debug register + case AM_G: // reg field of ModR/M selects a general register + case AM_P: // reg field of ModR/M selects an MMX register + case AM_R: // mod field of ModR/M may refer only to a general register + case AM_S: // reg field of ModR/M selects a segment register + case AM_T: // reg field of ModR/M selects a test register + case AM_V: // reg field of ModR/M selects a 128-bit XMM register + have_modrm_ = true; + break; + + // In these addressing modes, there is a ModR/M byte and it needs to be + // decoded. No other (e.g. immediate) params than indicated in ModR/M. + case AM_E: // Operand is either a general-purpose register or memory, + // specified by ModR/M byte + case AM_M: // ModR/M byte will refer only to memory + case AM_Q: // Operand is either an MMX register or memory (complex + // evaluation), specified by ModR/M byte + case AM_W: // Operand is either a 128-bit XMM register or memory (complex + // eval), specified by ModR/M byte + have_modrm_ = true; + should_decode_modrm_ = true; + break; + + // These addressing modes specify an immediate or an offset value + // directly, so we need to look at the operand type to see how many + // bytes. + case AM_I: // Immediate data. + case AM_J: // Jump to offset. + case AM_O: // Operand is at offset. + switch (flag_operand & OT_MASK) { + case OT_B: // Byte regardless of operand-size attribute. + operand_bytes_ += OS_BYTE; + break; + case OT_C: // Byte or word, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_WORD; + else + operand_bytes_ += OS_BYTE; + break; + case OT_D: // Doubleword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_WORD; + break; + case OT_DQ: // Double-quadword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_QUAD_WORD; + break; + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size + // attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_48_BIT_POINTER; + else + operand_bytes_ += OS_32_BIT_POINTER; + break; + case OT_PS: // 128-bit packed single-precision floating-point data. + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; + break; + case OT_Q: // Quadword, regardless of operand-size attribute. + operand_bytes_ += OS_QUAD_WORD; + break; + case OT_S: // 6-byte pseudo-descriptor. + operand_bytes_ += OS_PSEUDO_DESCRIPTOR; + break; + case OT_SD: // Scalar Double-Precision Floating-Point Value + case OT_PD: // Unaligned packed double-precision floating point value + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; + break; + case OT_SS: + // Scalar element of a 128-bit packed single-precision + // floating data. + // We simply return enItUnknown since we don't have to support + // floating point + succeeded = false; + break; + case OT_V: // Word, doubleword or quadword, depending on operand-size + // attribute. + if (operand_is_64_bits_ && flag_operand & AM_I && + flag_operand & IOS_64) + operand_bytes_ += OS_QUAD_WORD; + else if (operand_is_32_bits_) + operand_bytes_ += OS_DOUBLE_WORD; + else + operand_bytes_ += OS_WORD; + break; + case OT_W: // Word, regardless of operand-size attribute. + operand_bytes_ += OS_WORD; + break; + + // Can safely ignore these. + case OT_A: // Two one-word operands in memory or two double-word + // operands in memory + case OT_PI: // Quadword MMX technology register (e.g. mm0) + case OT_SI: // Doubleword integer register (e.g., eax) + break; + + default: + break; + } + break; + + default: + break; + } + + return succeeded; +} + +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, + unsigned int& size) { + // If we don't need to decode, we just return the size of the ModR/M + // byte (there is never a SIB byte in this case). + if (!should_decode_modrm_) { + size++; + return true; + } + + // We never care about the reg field, only the combination of the mod + // and r/m fields, so let's start by packing those fields together into + // 5 bits. + unsigned char modrm = (*start_byte); + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field + mod = mod >> 3; // shift the mod field to the right place + modrm = mod | modrm; // combine the r/m and mod fields as discussed + mod = mod >> 3; // shift the mod field to bits 2..0 + + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field + // in bits 2..0, and mod contains the mod field in bits 2..0 + + const ModrmEntry* modrm_entry = 0; + if (address_is_32_bits_) + modrm_entry = &s_ia32_modrm_map_[modrm]; + else + modrm_entry = &s_ia16_modrm_map_[modrm]; + + // Invariant: modrm_entry points to information that we need to decode + // the ModR/M byte. + + // Add to the count of operand bytes, if the ModR/M byte indicates + // that some operands are encoded in the instruction. + if (modrm_entry->is_encoded_in_instruction_) + operand_bytes_ += modrm_entry->operand_size_; + + // Process the SIB byte if necessary, and return the count + // of ModR/M and SIB bytes. + if (modrm_entry->use_sib_byte_) { + size++; + return ProcessSib(start_byte + 1, mod, size); + } else { + size++; + return true; + } +} + +bool MiniDisassembler::ProcessSib(unsigned char* start_byte, + unsigned char mod, + unsigned int& size) { + // get the mod field from the 2..0 bits of the SIB byte + unsigned char sib_base = (*start_byte) & 0x07; + if (0x05 == sib_base) { + switch (mod) { + case 0x00: // mod == 00 + case 0x02: // mod == 10 + operand_bytes_ += OS_DOUBLE_WORD; + break; + case 0x01: // mod == 01 + operand_bytes_ += OS_BYTE; + break; + case 0x03: // mod == 11 + // According to the IA-32 docs, there does not seem to be a disp + // value for this value of mod + default: + break; + } + } + + size++; + return true; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.7/src/windows/mini_disassembler.h b/src/third_party/gperftools-2.7/src/windows/mini_disassembler.h new file mode 100644 index 00000000000..93bdc0632ff --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/mini_disassembler.h @@ -0,0 +1,198 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Definition of MiniDisassembler. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ + +#include "config.h" +#include <windows.h> +#include "mini_disassembler_types.h" + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +namespace sidestep { + +// This small disassembler is very limited +// in its functionality, and in fact does only the bare minimum required by the +// preamble patching utility. It may be useful for other purposes, however. +// +// The limitations include at least the following: +// -# No support for coprocessor opcodes, MMX, etc. +// -# No machine-readable identification of opcodes or decoding of +// assembly parameters. The name of the opcode (as a string) is given, +// however, to aid debugging. +// +// You may ask what this little disassembler actually does, then? The answer is +// that it does the following, which is exactly what the patching utility needs: +// -# Indicates if opcode is a jump (any kind) or a return (any kind) +// because this is important for the patching utility to determine if +// a function is too short or there are jumps too early in it for it +// to be preamble patched. +// -# The opcode length is always calculated, so that the patching utility +// can figure out where the next instruction starts, and whether it +// already has enough instructions to replace with the absolute jump +// to the patching code. +// +// The usage is quite simple; just create a MiniDisassembler and use its +// Disassemble() method. +// +// If you would like to extend this disassembler, please refer to the +// IA-32 Intel® Architecture Software Developer’s Manual Volume 2: +// Instruction Set Reference for information about operand decoding +// etc. +class PERFTOOLS_DLL_DECL MiniDisassembler { + public: + + // Creates a new instance and sets defaults. + // + // @param operand_default_32_bits If true, the default operand size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + // @param address_default_32_bits If true, the default address size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + MiniDisassembler(bool operand_default_32_bits, + bool address_default_32_bits); + + // Equivalent to MiniDisassembler(true, true); + MiniDisassembler(); + + // Attempts to disassemble a single instruction starting from the + // address in memory it is pointed to. + // + // @param start Address where disassembly should start. + // @param instruction_bytes Variable that will be <b>incremented</b> by + // the length in bytes of the instruction. + // @return enItJump, enItReturn or enItGeneric on success. enItUnknown + // if unable to disassemble, enItUnused if this seems to be an unused + // opcode. In the last two (error) cases, cbInstruction will be set + // to 0xffffffff. + // + // @post This instance of the disassembler is ready to be used again, + // with unchanged defaults from creation time. + InstructionType Disassemble(unsigned char* start, unsigned int& instruction_bytes); + + private: + + // Makes the disassembler ready for reuse. + void Initialize(); + + // Sets the flags for address and operand sizes. + // @return Number of prefix bytes. + InstructionType ProcessPrefixes(unsigned char* start, unsigned int& size); + + // Sets the flag for whether we have ModR/M, and increments + // operand_bytes_ if any are specifies by the opcode directly. + // @return Number of opcode bytes. + InstructionType ProcessOpcode(unsigned char* start, + unsigned int table, + unsigned int& size); + + // Checks the type of the supplied operand. Increments + // operand_bytes_ if it directly indicates an immediate etc. + // operand. Asserts have_modrm_ if the operand specifies + // a ModR/M byte. + bool ProcessOperand(int flag_operand); + + // Increments operand_bytes_ by size specified by ModR/M and + // by SIB if present. + // @return 0 in case of error, 1 if there is just a ModR/M byte, + // 2 if there is a ModR/M byte and a SIB byte. + bool ProcessModrm(unsigned char* start, unsigned int& size); + + // Processes the SIB byte that it is pointed to. + // @param start Pointer to the SIB byte. + // @param mod The mod field from the ModR/M byte. + // @return 1 to indicate success (indicates 1 SIB byte) + bool ProcessSib(unsigned char* start, unsigned char mod, unsigned int& size); + + // The instruction type we have decoded from the opcode. + InstructionType instruction_type_; + + // Counts the number of bytes that is occupied by operands in + // the current instruction (note: we don't care about how large + // operands stored in registers etc. are). + unsigned int operand_bytes_; + + // True iff there is a ModR/M byte in this instruction. + bool have_modrm_; + + // True iff we need to decode the ModR/M byte (sometimes it just + // points to a register, we can tell by the addressing mode). + bool should_decode_modrm_; + + // Current operand size is 32 bits if true, 16 bits if false. + bool operand_is_32_bits_; + + // Default operand size is 32 bits if true, 16 bits if false. + bool operand_default_is_32_bits_; + + // Current address size is 32 bits if true, 16 bits if false. + bool address_is_32_bits_; + + // Default address size is 32 bits if true, 16 bits if false. + bool address_default_is_32_bits_; + + // Determines if 64 bit operands are supported (x64). + bool operand_default_support_64_bits_; + + // Current operand size is 64 bits if true, 32 bits if false. + bool operand_is_64_bits_; + + // Huge big opcode table based on the IA-32 manual, defined + // in Ia32OpcodeMap.cc + static const OpcodeTable s_ia32_opcode_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 16-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia16_modrm_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 32-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia32_modrm_map_[]; + + // Indicators of whether we got certain prefixes that certain + // silly Intel instructions depend on in nonstandard ways for + // their behaviors. + bool got_f2_prefix_, got_f3_prefix_, got_66_prefix_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ diff --git a/src/third_party/gperftools-2.7/src/windows/mini_disassembler_types.h b/src/third_party/gperftools-2.7/src/windows/mini_disassembler_types.h new file mode 100644 index 00000000000..aceecf45cc7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/mini_disassembler_types.h @@ -0,0 +1,237 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Several simple types used by the disassembler and some of the patching + * mechanisms. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ + +namespace sidestep { + +// Categories of instructions that we care about +enum InstructionType { + // This opcode is not used + IT_UNUSED, + // This disassembler does not recognize this opcode (error) + IT_UNKNOWN, + // This is not an instruction but a reference to another table + IT_REFERENCE, + // This byte is a prefix byte that we can ignore + IT_PREFIX, + // This is a prefix byte that switches to the nondefault address size + IT_PREFIX_ADDRESS, + // This is a prefix byte that switches to the nondefault operand size + IT_PREFIX_OPERAND, + // A jump or call instruction + IT_JUMP, + // A return instruction + IT_RETURN, + // Any other type of instruction (in this case we don't care what it is) + IT_GENERIC, +}; + +// Lists IA-32 operand sizes in multiples of 8 bits +enum OperandSize { + OS_ZERO = 0, + OS_BYTE = 1, + OS_WORD = 2, + OS_DOUBLE_WORD = 4, + OS_QUAD_WORD = 8, + OS_DOUBLE_QUAD_WORD = 16, + OS_32_BIT_POINTER = 32/8, + OS_48_BIT_POINTER = 48/8, + OS_SINGLE_PRECISION_FLOATING = 32/8, + OS_DOUBLE_PRECISION_FLOATING = 64/8, + OS_DOUBLE_EXTENDED_PRECISION_FLOATING = 80/8, + OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING = 128/8, + OS_PSEUDO_DESCRIPTOR = 6 +}; + +// Operand addressing methods from the IA-32 manual. The enAmMask value +// is a mask for the rest. The other enumeration values are named for the +// names given to the addressing methods in the manual, e.g. enAm_D is for +// the D addressing method. +// +// The reason we use a full 4 bytes and a mask, is that we need to combine +// these flags with the enOperandType to store the details +// on the operand in a single integer. +enum AddressingMethod { + AM_NOT_USED = 0, // This operand is not used for this instruction + AM_MASK = 0x00FF0000, // Mask for the rest of the values in this enumeration + AM_A = 0x00010000, // A addressing type + AM_C = 0x00020000, // C addressing type + AM_D = 0x00030000, // D addressing type + AM_E = 0x00040000, // E addressing type + AM_F = 0x00050000, // F addressing type + AM_G = 0x00060000, // G addressing type + AM_I = 0x00070000, // I addressing type + AM_J = 0x00080000, // J addressing type + AM_M = 0x00090000, // M addressing type + AM_O = 0x000A0000, // O addressing type + AM_P = 0x000B0000, // P addressing type + AM_Q = 0x000C0000, // Q addressing type + AM_R = 0x000D0000, // R addressing type + AM_S = 0x000E0000, // S addressing type + AM_T = 0x000F0000, // T addressing type + AM_V = 0x00100000, // V addressing type + AM_W = 0x00110000, // W addressing type + AM_X = 0x00120000, // X addressing type + AM_Y = 0x00130000, // Y addressing type + AM_REGISTER = 0x00140000, // Specific register is always used as this op + AM_IMPLICIT = 0x00150000, // An implicit, fixed value is used +}; + +// Operand types from the IA-32 manual. The enOtMask value is +// a mask for the rest. The rest of the values are named for the +// names given to these operand types in the manual, e.g. enOt_ps +// is for the ps operand type in the manual. +// +// The reason we use a full 4 bytes and a mask, is that we need +// to combine these flags with the enAddressingMethod to store the details +// on the operand in a single integer. +enum OperandType { + OT_MASK = 0xFF000000, + OT_A = 0x01000000, + OT_B = 0x02000000, + OT_C = 0x03000000, + OT_D = 0x04000000, + OT_DQ = 0x05000000, + OT_P = 0x06000000, + OT_PI = 0x07000000, + OT_PS = 0x08000000, // actually unsupported for (we don't know its size) + OT_Q = 0x09000000, + OT_S = 0x0A000000, + OT_SS = 0x0B000000, + OT_SI = 0x0C000000, + OT_V = 0x0D000000, + OT_W = 0x0E000000, + OT_SD = 0x0F000000, // scalar double-precision floating-point value + OT_PD = 0x10000000, // double-precision floating point + // dummy "operand type" for address mode M - which doesn't specify + // operand type + OT_ADDRESS_MODE_M = 0x80000000 +}; + +// Flag that indicates if an immediate operand is 64-bits. +// +// The Intel 64 and IA-32 Architecture Software Developer's Manual currently +// defines MOV as the only instruction supporting a 64-bit immediate operand. +enum ImmediateOperandSize { + IOS_MASK = 0x0000F000, + IOS_DEFAULT = 0x0, + IOS_64 = 0x00001000 +}; + +// Everything that's in an Opcode (see below) except the three +// alternative opcode structs for different prefixes. +struct SpecificOpcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from enOperandType, enAddressingMethod and + // enImmediateOperandSize flags. + int flag_dest_; + int flag_source_; + int flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; +}; + +// The information we keep in our tables about each of the different +// valid instructions recognized by the IA-32 architecture. +struct Opcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from an enOperandType flag and an enAddressingMethod + // flag. + unsigned flag_dest_; + unsigned flag_source_; + unsigned flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; + + // Alternative opcode info if certain prefixes are specified. + // In most cases, all of these are zeroed-out. Only used if + // bPrefixDependent is true. + bool is_prefix_dependent_; + SpecificOpcode opcode_if_f2_prefix_; + SpecificOpcode opcode_if_f3_prefix_; + SpecificOpcode opcode_if_66_prefix_; +}; + +// Information about each table entry. +struct OpcodeTable { + // Table of instruction entries + const Opcode* table_; + // How many bytes left to shift ModR/M byte <b>before</b> applying mask + unsigned char shift_; + // Mask to apply to byte being looked at before comparing to table + unsigned char mask_; + // Minimum/maximum indexes in table. + unsigned char min_lim_; + unsigned char max_lim_; +}; + +// Information about each entry in table used to decode ModR/M byte. +struct ModrmEntry { + // Is the operand encoded as bytes in the instruction (rather than + // if it's e.g. a register in which case it's just encoded in the + // ModR/M byte) + bool is_encoded_in_instruction_; + + // Is there a SIB byte? In this case we always need to decode it. + bool use_sib_byte_; + + // What is the size of the operand (only important if it's encoded + // in the instruction)? + OperandSize operand_size_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ diff --git a/src/third_party/gperftools-2.7/src/windows/nm-pdb.c b/src/third_party/gperftools-2.7/src/windows/nm-pdb.c new file mode 100644 index 00000000000..95a080d6859 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/nm-pdb.c @@ -0,0 +1,273 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> // for _strdup + +#include <windows.h> +#include <dbghelp.h> + +// Unfortunately, there is no versioning info in dbghelp.h so I can +// tell whether it has an old-style (circa VC7.1) IMAGEHLP_MODULE64 +// struct, with only a few fields, or a new-style (circa VC8) +// IMAGEHLP_MODULE64, with lots of fields. These fields are just used +// for debugging, so it's fine to just assume the smaller struct, but +// for most people, using a modern MSVC, the full struct is available. +// If you are one of those people and would like this extra debugging +// info, you can uncomment the line below. +//#define VC8_OR_ABOVE + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +typedef struct { + char *name; + ULONG64 addr; + ULONG flags; +} SYM; + +typedef struct { + ULONG64 module_base; + SYM *syms; + DWORD syms_len; + DWORD syms_cap; +} SYM_CONTEXT; + +static int sym_cmp(const void *_s1, const void *_s2) { + const SYM *s1 = (const SYM *)_s1; + const SYM *s2 = (const SYM *)_s2; + + if (s1->addr < s2->addr) + return -1; + if (s1->addr > s2->addr) + return 1; + return 0; +} + +static BOOL CALLBACK EnumSymProc(PSYMBOL_INFO symbol_info, + ULONG symbol_size, + PVOID user_context) { + SYM_CONTEXT *ctx = (SYM_CONTEXT*)user_context; + if (symbol_info->Address < ctx->module_base || + (symbol_info->Flags & SYMFLAG_TLSREL)) { + return TRUE; + } + if (ctx->syms_len == ctx->syms_cap) { + if (!ctx->syms_cap) + ctx->syms_cap++; + ctx->syms_cap *= 2; + ctx->syms = realloc(ctx->syms, sizeof(ctx->syms[0]) * ctx->syms_cap); + } + ctx->syms[ctx->syms_len].name = _strdup(symbol_info->Name); + ctx->syms[ctx->syms_len].addr = symbol_info->Address; + ctx->syms[ctx->syms_len].flags = symbol_info->Flags; + ctx->syms_len++; + return TRUE; +} + +static void MaybePrint(const char* var, const char* description) { + if (var[0]) + printf("%s: %s\n", description, var); +} + +static void PrintAvailability(BOOL var, const char *description) { + printf("%s: %s\n", description, (var ? "Available" : "Not available")); +} + +static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) { + /* Get module information. */ + IMAGEHLP_MODULE64 module_info; + BOOL getmoduleinfo_rv; + printf("Load Address: %I64x\n", module_base); + memset(&module_info, 0, sizeof(module_info)); + module_info.SizeOfStruct = sizeof(module_info); + getmoduleinfo_rv = SymGetModuleInfo64(process, module_base, &module_info); + if (!getmoduleinfo_rv) { + printf("Error: SymGetModuleInfo64() failed. Error code: %u\n", + GetLastError()); + return; + } + /* Display information about symbols, based on kind of symbol. */ + switch (module_info.SymType) { + case SymNone: + printf(("No symbols available for the module.\n")); + break; + case SymExport: + printf(("Loaded symbols: Exports\n")); + break; + case SymCoff: + printf(("Loaded symbols: COFF\n")); + break; + case SymCv: + printf(("Loaded symbols: CodeView\n")); + break; + case SymSym: + printf(("Loaded symbols: SYM\n")); + break; + case SymVirtual: + printf(("Loaded symbols: Virtual\n")); + break; + case SymPdb: + printf(("Loaded symbols: PDB\n")); + break; + case SymDia: + printf(("Loaded symbols: DIA\n")); + break; + case SymDeferred: + printf(("Loaded symbols: Deferred\n")); /* not actually loaded */ + break; + default: + printf(("Loaded symbols: Unknown format.\n")); + break; + } + + MaybePrint("Image name", module_info.ImageName); + MaybePrint("Loaded image name", module_info.LoadedImageName); +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + MaybePrint("PDB file name", module_info.LoadedPdbName); + if (module_info.PdbUnmatched || module_info.DbgUnmatched) { + /* This can only happen if the debug information is contained in a + * separate file (.DBG or .PDB) + */ + printf(("Warning: Unmatched symbols.\n")); + } +#endif + + /* Contents */ +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + PrintAvailability("Line numbers", module_info.LineNumbers); + PrintAvailability("Global symbols", module_info.GlobalSymbols); + PrintAvailability("Type information", module_info.TypeInfo); +#endif +} + +void usage() { + fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); +} + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + SYM_CONTEXT ctx; + int i; + char* search; + char* filename = NULL; + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); + } else { + break; + } + } + if (i != argc - 1) { + usage(); + exit(1); + } + filename = argv[i]; + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + ShowSymbolInfo(process, module_base); + + memset(&ctx, 0, sizeof(ctx)); + ctx.module_base = module_base; + if (!SymEnumSymbols(process, module_base, NULL, EnumSymProc, &ctx)) { + error = GetLastError(); + fprintf(stderr, "SymEnumSymbols returned error: %d\n", error); + rv = 1; + } else { + DWORD j; + qsort(ctx.syms, ctx.syms_len, sizeof(ctx.syms[0]), sym_cmp); + for (j = 0; j < ctx.syms_len; j++) { + printf("%016I64x X %s\n", ctx.syms[j].addr, ctx.syms[j].name); + } + /* In a perfect world, maybe we'd clean up ctx's memory? */ + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/src/third_party/gperftools-2.7/src/windows/override_functions.cc b/src/third_party/gperftools-2.7/src/windows/override_functions.cc new file mode 100644 index 00000000000..f6f519add3a --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/override_functions.cc @@ -0,0 +1,158 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Mike Belshe +// +// To link tcmalloc into a EXE or DLL statically without using the patching +// facility, we can take a stock libcmt and remove all the allocator functions. +// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few +// functions are missing. This file contains the additional overrides which +// are required in the VS2005 libcmt in order to link the modified libcmt. +// +// See also +// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + +#include <config.h> + +#ifndef _WIN32 +# error You should only be including this file in a windows environment! +#endif + +#ifndef WIN32_OVERRIDE_ALLOCATORS +# error This file is intended for use when overriding allocators +#endif + +#include "tcmalloc.cc" + +extern "C" { + +void* _malloc_base(size_t size) { + return malloc(size); +} + +void _free_base(void* p) { + free(p); +} + +void* _calloc_base(size_t n, size_t size) { + return calloc(n, size); +} + +void* _recalloc(void* p, size_t n, size_t size) { + void* result = realloc(p, n * size); + memset(result, 0, n * size); + return result; +} + +void* _calloc_impl(size_t n, size_t size) { + return calloc(n, size); +} + +size_t _msize(void* p) { + return MallocExtension::instance()->GetAllocatedSize(p); +} + +HANDLE __acrt_heap = nullptr; + +bool __acrt_initialize_heap() { + new TCMallocGuard(); + return true; +} + +bool __acrt_uninitialize_heap(bool) { + return true; +} + +intptr_t _get_heap_handle() { + return 0; +} + +HANDLE __acrt_getheap() { + return __acrt_heap; +} + +// The CRT heap initialization stub. +int _heap_init() { + // We intentionally leak this object. It lasts for the process + // lifetime. Trying to teardown at _heap_term() is so late that + // you can't do anything useful anyway. + new TCMallocGuard(); + return 1; +} + +// The CRT heap cleanup stub. +void _heap_term() { +} + +// We set this to 1 because part of the CRT uses a check of _crtheap != 0 +// to test whether the CRT has been initialized. Once we've ripped out +// the allocators from libcmt, we need to provide this definition so that +// the rest of the CRT is still usable. +void* _crtheap = reinterpret_cast<void*>(1); + +int _set_new_mode(int flag) { + return tc_set_new_mode(flag); +} + +int _query_new_mode() { + return tc_query_new_mode(); +} + +} // extern "C" + +#ifndef NDEBUG +#undef malloc +#undef free +#undef calloc +int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) { + return 0; +} + +int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) { + return 0; +} + +int _CrtSetReportMode(int, int) { + return 0; +} + +extern "C" void* _malloc_dbg(size_t size, int , const char*, int) { + return malloc(size); +} + +extern "C" void _free_dbg(void* ptr, int) { + free(ptr); +} + +extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) { + return calloc(n, size); +} +#endif // NDEBUG diff --git a/src/third_party/gperftools-2.7/src/windows/patch_functions.cc b/src/third_party/gperftools-2.7/src/windows/patch_functions.cc new file mode 100644 index 00000000000..5417880e3cb --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/patch_functions.cc @@ -0,0 +1,1081 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein +// +// The main purpose of this file is to patch the libc allocation +// routines (malloc and friends, but also _msize and other +// windows-specific libc-style routines). However, we also patch +// windows routines to do accounting. We do better at the former than +// the latter. Here are some comments from Paul Pluzhnikov about what +// it might take to do a really good job patching windows routines to +// keep track of memory usage: +// +// "You should intercept at least the following: +// HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree +// RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap +// malloc calloc realloc free +// malloc_dbg calloc_dbg realloc_dbg free_dbg +// Some of these call the other ones (but not always), sometimes +// recursively (i.e. HeapCreate may call HeapAlloc on a different +// heap, IIRC)." +// +// Since Paul didn't mention VirtualAllocEx, he may not have even been +// considering all the mmap-like functions that windows has (or he may +// just be ignoring it because he's seen we already patch it). Of the +// above, we do not patch the *_dbg functions, and of the windows +// functions, we only patch HeapAlloc and HeapFree. +// +// The *_dbg functions come into play with /MDd, /MTd, and /MLd, +// probably. It may be ok to just turn off tcmalloc in those cases -- +// if the user wants the windows debug malloc, they probably don't +// want tcmalloc! We should also test with all of /MD, /MT, and /ML, +// which we're not currently doing. + +// TODO(csilvers): try to do better here? Paul does conclude: +// "Keeping track of all of this was a nightmare." + +#ifndef _WIN32 +# error You should only be including windows/patch_functions.cc in a windows environment! +#endif + +#include <config.h> + +#ifdef WIN32_OVERRIDE_ALLOCATORS +#error This file is intended for patching allocators - use override_functions.cc instead. +#endif + +// We use psapi. Non-MSVC systems will have to link this in themselves. +#ifdef _MSC_VER +#pragma comment(lib, "Psapi.lib") +#endif + +// Make sure we always use the 'old' names of the psapi functions. +#ifndef PSAPI_VERSION +#define PSAPI_VERSION 1 +#endif + +#include <windows.h> +#include <stdio.h> +#include <malloc.h> // for _msize and _expand +#include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc. +#include <set> +#include <map> +#include <vector> +#include <base/logging.h> +#include "base/spinlock.h" +#include "gperftools/malloc_hook.h" +#include "malloc_hook-inl.h" +#include "preamble_patcher.h" + +// The maximum number of modules we allow to be in one executable +const int kMaxModules = 8182; + +// These are hard-coded, unfortunately. :-( They are also probably +// compiler specific. See get_mangled_names.cc, in this directory, +// for instructions on how to update these names for your compiler. +#ifdef _WIN64 +const char kMangledNew[] = "??2@YAPEAX_K@Z"; +const char kMangledNewArray[] = "??_U@YAPEAX_K@Z"; +const char kMangledDelete[] = "??3@YAXPEAX@Z"; +const char kMangledDeleteArray[] = "??_V@YAXPEAX@Z"; +const char kMangledNewNothrow[] = "??2@YAPEAX_KAEBUnothrow_t@std@@@Z"; +const char kMangledNewArrayNothrow[] = "??_U@YAPEAX_KAEBUnothrow_t@std@@@Z"; +const char kMangledDeleteNothrow[] = "??3@YAXPEAXAEBUnothrow_t@std@@@Z"; +const char kMangledDeleteArrayNothrow[] = "??_V@YAXPEAXAEBUnothrow_t@std@@@Z"; +#else +const char kMangledNew[] = "??2@YAPAXI@Z"; +const char kMangledNewArray[] = "??_U@YAPAXI@Z"; +const char kMangledDelete[] = "??3@YAXPAX@Z"; +const char kMangledDeleteArray[] = "??_V@YAXPAX@Z"; +const char kMangledNewNothrow[] = "??2@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledNewArrayNothrow[] = "??_U@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledDeleteNothrow[] = "??3@YAXPAXABUnothrow_t@std@@@Z"; +const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; +#endif + +// This is an unused but exported symbol that we can use to tell the +// MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag. +// Without this, the linker will likely decide that libtcmalloc.dll +// doesn't add anything to the executable (since it does all its work +// through patching, which the linker can't see), and ignore it +// entirely. (The name 'tcmalloc' is already reserved for a +// namespace. I'd rather export a variable named "_tcmalloc", but I +// couldn't figure out how to get that to work. This function exports +// the symbol "__tcmalloc".) +extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); +void _tcmalloc() { } + +// This is the version needed for windows x64, which has a different +// decoration scheme which doesn't auto-add a leading underscore. +extern "C" PERFTOOLS_DLL_DECL void __tcmalloc(); +void __tcmalloc() { } + +namespace { // most everything here is in an unnamed namespace + +typedef void (*GenericFnPtr)(); + +using sidestep::PreamblePatcher; + +struct ModuleEntryCopy; // defined below + +// These functions are how we override the memory allocation +// functions, just like tcmalloc.cc and malloc_hook.cc do. + +// This is information about the routines we're patching, for a given +// module that implements libc memory routines. A single executable +// can have several libc implementations running about (in different +// .dll's), and we need to patch/unpatch them all. This defines +// everything except the new functions we're patching in, which +// are defined in LibcFunctions, below. +class LibcInfo { + public: + LibcInfo() { + memset(this, 0, sizeof(*this)); // easiest way to initialize the array + } + + bool patched() const { return is_valid(); } + void set_is_valid(bool b) { is_valid_ = b; } + // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx: + // "The load address of a module (lpBaseOfDll) is the same as the HMODULE + // value." + HMODULE hmodule() const { + return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_)); + } + + // Populates all the windows_fn_[] vars based on our module info. + // Returns false if windows_fn_ is all NULL's, because there's + // nothing to patch. Also populates the rest of the module_entry + // info, such as the module's name. + bool PopulateWindowsFn(const ModuleEntryCopy& module_entry); + + protected: + void CopyFrom(const LibcInfo& that) { + if (this == &that) + return; + this->is_valid_ = that.is_valid_; + memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); + this->module_base_address_ = that.module_base_address_; + this->module_base_size_ = that.module_base_size_; + } + + enum { + kMalloc, kFree, kRealloc, kCalloc, + kNew, kNewArray, kDelete, kDeleteArray, + kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, + // These are windows-only functions from malloc.h + k_Msize, k_Expand, + // A MS CRT "internal" function, implemented using _calloc_impl + k_CallocCrt, kFreeBase, + kNumFunctions + }; + + // I'd like to put these together in a struct (perhaps in the + // subclass, so we can put in perftools_fn_ as well), but vc8 seems + // to have a bug where it doesn't initialize the struct properly if + // we try to take the address of a function that's not yet loaded + // from a dll, as is the common case for static_fn_. So we need + // each to be in its own array. :-( + static const char* const function_name_[kNumFunctions]; + + // This function is only used when statically linking the binary. + // In that case, loading malloc/etc from the dll (via + // PatchOneModule) won't work, since there are no dlls. Instead, + // you just want to be taking the address of malloc/etc directly. + // In the common, non-static-link case, these pointers will all be + // NULL, since this initializer runs before msvcrt.dll is loaded. + static const GenericFnPtr static_fn_[kNumFunctions]; + + // This is the address of the function we are going to patch + // (malloc, etc). Other info about the function is in the + // patch-specific subclasses, below. + GenericFnPtr windows_fn_[kNumFunctions]; + + // This is set to true when this structure is initialized (because + // we're patching a new library) and set to false when it's + // uninitialized (because we've freed that library). + bool is_valid_; + + const void *module_base_address_; + size_t module_base_size_; + + public: + // These shouldn't have to be public, since only subclasses of + // LibcInfo need it, but they do. Maybe something to do with + // templates. Shrug. I hide them down here so users won't see + // them. :-) (OK, I also need to define ctrgProcAddress late.) + bool is_valid() const { return is_valid_; } + GenericFnPtr windows_fn(int ifunction) const { + return windows_fn_[ifunction]; + } + // These three are needed by ModuleEntryCopy. + static const int ctrgProcAddress = kNumFunctions; + static GenericFnPtr static_fn(int ifunction) { + return static_fn_[ifunction]; + } + static const char* const function_name(int ifunction) { + return function_name_[ifunction]; + } +}; + +// Template trickiness: logically, a LibcInfo would include +// Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a +// given module, these three go together. And in fact, +// Perftools_malloc_ may need to call origstub_malloc_, which means we +// either need to change Perftools_malloc_ to take origstub_malloc_ as +// an argument -- unfortunately impossible since it needs to keep the +// same API as normal malloc -- or we need to write a different +// version of Perftools_malloc_ for each LibcInfo instance we create. +// We choose the second route, and use templates to implement it (we +// could have also used macros). So to get multiple versions +// of the struct, we say "struct<1> var1; struct<2> var2;". The price +// we pay is some code duplication, and more annoying, each instance +// of this var is a separate type. +template<int> class LibcInfoWithPatchFunctions : public LibcInfo { + public: + // me_info should have had PopulateWindowsFn() called on it, so the + // module_* vars and windows_fn_ are set up. + bool Patch(const LibcInfo& me_info); + void Unpatch(); + + private: + // This holds the original function contents after we patch the function. + // This has to be defined static in the subclass, because the perftools_fns + // reference origstub_fn_. + static GenericFnPtr origstub_fn_[kNumFunctions]; + + // This is the function we want to patch in + static const GenericFnPtr perftools_fn_[kNumFunctions]; + + static void* Perftools_malloc(size_t size) __THROW; + static void Perftools_free(void* ptr) __THROW; + static void Perftools_free_base(void* ptr) __THROW; + static void* Perftools_realloc(void* ptr, size_t size) __THROW; + static void* Perftools_calloc(size_t nmemb, size_t size) __THROW; + static void* Perftools_new(size_t size); + static void* Perftools_newarray(size_t size); + static void Perftools_delete(void *ptr); + static void Perftools_deletearray(void *ptr); + static void* Perftools_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void* Perftools_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void Perftools_delete_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static void Perftools_deletearray_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static size_t Perftools__msize(void *ptr) __THROW; + static void* Perftools__expand(void *ptr, size_t size) __THROW; + // malloc.h also defines these functions: + // _aligned_malloc, _aligned_free, + // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc + // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea + // But they seem pretty obscure, and I'm fine not overriding them for now. + // It may be they all call into malloc/free anyway. +}; + +// This is a subset of MODDULEENTRY32, that we need for patching. +struct ModuleEntryCopy { + LPVOID modBaseAddr; // the same as hmodule + DWORD modBaseSize; + // This is not part of MODDULEENTRY32, but is needed to avoid making + // windows syscalls while we're holding patch_all_modules_lock (see + // lock-inversion comments at patch_all_modules_lock definition, below). + GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress]; + + ModuleEntryCopy() { + modBaseAddr = NULL; + modBaseSize = 0; + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = LibcInfo::static_fn(i); + } + ModuleEntryCopy(const MODULEINFO& mi) { + this->modBaseAddr = mi.lpBaseOfDll; + this->modBaseSize = mi.SizeOfImage; + LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage; + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) { + FARPROC target = ::GetProcAddress( + reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), + LibcInfo::function_name(i)); + // Sometimes a DLL forwards a function to a function in another + // DLL. We don't want to patch those forwarded functions -- + // they'll get patched when the other DLL is processed. + if (target >= modBaseAddr && target < modEndAddr) + rgProcAddresses[i] = (GenericFnPtr)target; + else + rgProcAddresses[i] = (GenericFnPtr)NULL; + } + } +}; + +// This class is easier because there's only one of them. +class WindowsInfo { + public: + void Patch(); + void Unpatch(); + + private: + // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead, + // for pre-XP systems? + enum { + kHeapAlloc, kHeapFree, kVirtualAllocEx, kVirtualFreeEx, + kMapViewOfFileEx, kUnmapViewOfFile, kLoadLibraryExW, kFreeLibrary, + kNumFunctions + }; + + struct FunctionInfo { + const char* const name; // name of fn in a module (eg "malloc") + GenericFnPtr windows_fn; // the fn whose name we call (&malloc) + GenericFnPtr origstub_fn; // original fn contents after we patch + const GenericFnPtr perftools_fn; // fn we want to patch in + }; + + static FunctionInfo function_info_[kNumFunctions]; + + // A Windows-API equivalent of malloc and free + static LPVOID WINAPI Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes); + static BOOL WINAPI Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem); + // A Windows-API equivalent of mmap and munmap, for "anonymous regions" + static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type, + DWORD protect); + static BOOL WINAPI Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type); + // A Windows-API equivalent of mmap and munmap, for actual files + static LPVOID WINAPI Perftools_MapViewOfFileEx(HANDLE hFileMappingObject, + DWORD dwDesiredAccess, + DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, + SIZE_T dwNumberOfBytesToMap, + LPVOID lpBaseAddress); + static BOOL WINAPI Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress); + // We don't need the other 3 variants because they all call this one. */ + static HMODULE WINAPI Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags); + static BOOL WINAPI Perftools_FreeLibrary(HMODULE hLibModule); +}; + +// If you run out, just add a few more to the array. You'll also need +// to update the switch statement in PatchOneModule(), and the list in +// UnpatchWindowsFunctions(). +// main_executable and main_executable_windows are two windows into +// the same executable. One is responsible for patching the libc +// routines that live in the main executable (if any) to use tcmalloc; +// the other is responsible for patching the windows routines like +// HeapAlloc/etc to use tcmalloc. +static LibcInfoWithPatchFunctions<0> main_executable; +static LibcInfoWithPatchFunctions<1> libc1; +static LibcInfoWithPatchFunctions<2> libc2; +static LibcInfoWithPatchFunctions<3> libc3; +static LibcInfoWithPatchFunctions<4> libc4; +static LibcInfoWithPatchFunctions<5> libc5; +static LibcInfoWithPatchFunctions<6> libc6; +static LibcInfoWithPatchFunctions<7> libc7; +static LibcInfoWithPatchFunctions<8> libc8; +static LibcInfo* g_module_libcs[] = { + &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8 +}; +static WindowsInfo main_executable_windows; + +const char* const LibcInfo::function_name_[] = { + "malloc", "free", "realloc", "calloc", + kMangledNew, kMangledNewArray, kMangledDelete, kMangledDeleteArray, + // Ideally we should patch the nothrow versions of new/delete, but + // at least in msvcrt, nothrow-new machine-code is of a type we + // can't patch. Since these are relatively rare, I'm hoping it's ok + // not to patch them. (NULL name turns off patching.) + NULL, // kMangledNewNothrow, + NULL, // kMangledNewArrayNothrow, + NULL, // kMangledDeleteNothrow, + NULL, // kMangledDeleteArrayNothrow, + "_msize", "_expand", "_calloc_crt", "_free_base" +}; + +// For mingw, I can't patch the new/delete here, because the +// instructions are too small to patch. Luckily, they're so small +// because all they do is call into malloc/free, so they still end up +// calling tcmalloc routines, and we don't actually lose anything +// (except maybe some stacktrace goodness) by not patching. +const GenericFnPtr LibcInfo::static_fn_[] = { + (GenericFnPtr)&::malloc, + (GenericFnPtr)&::free, + (GenericFnPtr)&::realloc, + (GenericFnPtr)&::calloc, +#ifdef __MINGW32__ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +#else + (GenericFnPtr)(void*(*)(size_t))&::operator new, + (GenericFnPtr)(void*(*)(size_t))&::operator new[], + (GenericFnPtr)(void(*)(void*))&::operator delete, + (GenericFnPtr)(void(*)(void*))&::operator delete[], + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new, + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new[], + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete, + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete[], +#endif + (GenericFnPtr)&::_msize, + (GenericFnPtr)&::_expand, + (GenericFnPtr)&::calloc, + (GenericFnPtr)&::free +}; + +template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { + // This will get filled in at run-time, as patching is done. +}; + +template<int T> +const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { + (GenericFnPtr)&Perftools_malloc, + (GenericFnPtr)&Perftools_free, + (GenericFnPtr)&Perftools_realloc, + (GenericFnPtr)&Perftools_calloc, + (GenericFnPtr)&Perftools_new, + (GenericFnPtr)&Perftools_newarray, + (GenericFnPtr)&Perftools_delete, + (GenericFnPtr)&Perftools_deletearray, + (GenericFnPtr)&Perftools_new_nothrow, + (GenericFnPtr)&Perftools_newarray_nothrow, + (GenericFnPtr)&Perftools_delete_nothrow, + (GenericFnPtr)&Perftools_deletearray_nothrow, + (GenericFnPtr)&Perftools__msize, + (GenericFnPtr)&Perftools__expand, + (GenericFnPtr)&Perftools_calloc, + (GenericFnPtr)&Perftools_free_base +}; + +/*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { + { "HeapAlloc", NULL, NULL, (GenericFnPtr)&Perftools_HeapAlloc }, + { "HeapFree", NULL, NULL, (GenericFnPtr)&Perftools_HeapFree }, + { "VirtualAllocEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualAllocEx }, + { "VirtualFreeEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualFreeEx }, + { "MapViewOfFileEx", NULL, NULL, (GenericFnPtr)&Perftools_MapViewOfFileEx }, + { "UnmapViewOfFile", NULL, NULL, (GenericFnPtr)&Perftools_UnmapViewOfFile }, + { "LoadLibraryExW", NULL, NULL, (GenericFnPtr)&Perftools_LoadLibraryExW }, + { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, +}; + +bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { + // First, store the location of the function to patch before + // patching it. If none of these functions are found in the module, + // then this module has no libc in it, and we just return false. + for (int i = 0; i < kNumFunctions; i++) { + if (!function_name_[i]) // we can turn off patching by unsetting name + continue; + // The ::GetProcAddress calls were done in the ModuleEntryCopy + // constructor, so we don't have to make any windows calls here. + const GenericFnPtr fn = module_entry.rgProcAddresses[i]; + if (fn) { + windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); + } + } + + // Some modules use the same function pointer for new and new[]. If + // we find that, set one of the pointers to NULL so we don't double- + // patch. Same may happen with new and nothrow-new, or even new[] + // and nothrow-new. It's easiest just to check each fn-ptr against + // every other. + for (int i = 0; i < kNumFunctions; i++) { + for (int j = i+1; j < kNumFunctions; j++) { + if (windows_fn_[i] == windows_fn_[j]) { + // We NULL the later one (j), so as to minimize the chances we + // NULL kFree and kRealloc. See comments below. This is fragile! + windows_fn_[j] = NULL; + } + } + } + + // There's always a chance that our module uses the same function + // as another module that we've already loaded. In that case, we + // need to set our windows_fn to NULL, to avoid double-patching. + for (int ifn = 0; ifn < kNumFunctions; ifn++) { + for (int imod = 0; + imod < sizeof(g_module_libcs)/sizeof(*g_module_libcs); imod++) { + if (g_module_libcs[imod]->is_valid() && + this->windows_fn(ifn) == g_module_libcs[imod]->windows_fn(ifn)) { + windows_fn_[ifn] = NULL; + } + } + } + + bool found_non_null = false; + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + found_non_null = true; + } + if (!found_non_null) + return false; + + // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc]. + // The reason is, if those are NULL-ed out, we'll never patch them + // and thus never get an origstub_fn_ value for them, and when we + // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and + // Perftools_realloc, below, it will fail. We could work around + // that by adding a pointer from one patch-unit to the other, but we + // haven't needed to yet. + CHECK(windows_fn_[kFree]); + CHECK(windows_fn_[kRealloc]); + + // OK, we successfully populated. Let's store our member information. + module_base_address_ = module_entry.modBaseAddr; + module_base_size_ = module_entry.modBaseSize; + return true; +} + +template<int T> +bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { + CopyFrom(me_info); // copies the module_entry and the windows_fn_ array + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) { + // if origstub_fn_ is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // + // Note that origstub_fn_ was logically freed by + // PreamblePatcher::Unpatch, so we don't have to do anything + // about it. + origstub_fn_[i] = NULL; // Patch() will fill this in + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], + &origstub_fn_[i])); + } + } + set_is_valid(true); + return true; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)windows_fn_[i], + (void*)perftools_fn_[i], + (void*)origstub_fn_[i])); + } + set_is_valid(false); +} + +void WindowsInfo::Patch() { + HMODULE hkernel32 = ::GetModuleHandleA("kernel32"); + CHECK_NE(hkernel32, NULL); + + // Unlike for libc, we know these exist in our module, so we can get + // and patch at the same time. + for (int i = 0; i < kNumFunctions; i++) { + function_info_[i].windows_fn = (GenericFnPtr) + ::GetProcAddress(hkernel32, function_info_[i].name); + // If origstub_fn is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // Since we've patched Unpatch() not to delete origstub_fn_ (it + // causes problems in some contexts, though obviously not this + // one), we should delete it now, before setting it to NULL. + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(function_info_[i].origstub_fn); + function_info_[i].origstub_fn = NULL; // Patch() will fill this in + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(function_info_[i].windows_fn, + function_info_[i].perftools_fn, + &function_info_[i].origstub_fn)); + } +} + +void WindowsInfo::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)function_info_[i].windows_fn, + (void*)function_info_[i].perftools_fn, + (void*)function_info_[i].origstub_fn)); + } +} + +// You should hold the patch_all_modules_lock when calling this. +void PatchOneModuleLocked(const LibcInfo& me_info) { + // If we don't already have info on this module, let's add it. This + // is where we're sad that each libcX has a different type, so we + // can't use an array; instead, we have to use a switch statement. + // Patch() returns false if there were no libc functions in the module. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (!g_module_libcs[i]->is_valid()) { // found an empty spot to add! + switch (i) { + case 0: libc1.Patch(me_info); return; + case 1: libc2.Patch(me_info); return; + case 2: libc3.Patch(me_info); return; + case 3: libc4.Patch(me_info); return; + case 4: libc5.Patch(me_info); return; + case 5: libc6.Patch(me_info); return; + case 6: libc7.Patch(me_info); return; + case 7: libc8.Patch(me_info); return; + } + } + } + printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n"); +} + +void PatchMainExecutableLocked() { + if (main_executable.patched()) + return; // main executable has already been patched + ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch() + // No need to call PopulateModuleEntryProcAddresses on the main executable. + main_executable.PopulateWindowsFn(fake_module_entry); + main_executable.Patch(main_executable); +} + +// This lock is subject to a subtle and annoying lock inversion +// problem: it may interact badly with unknown internal windows locks. +// In particular, windows may be holding a lock when it calls +// LoadLibraryExW and FreeLibrary, which we've patched. We have those +// routines call PatchAllModules, which acquires this lock. If we +// make windows system calls while holding this lock, those system +// calls may need the internal windows locks that are being held in +// the call to LoadLibraryExW, resulting in deadlock. The solution is +// to be very careful not to call *any* windows routines while holding +// patch_all_modules_lock, inside PatchAllModules(). +static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); + +// last_loaded: The set of modules that were loaded the last time +// PatchAllModules was called. This is an optimization for only +// looking at modules that were added or removed from the last call. +static std::set<HMODULE> *g_last_loaded; + +// Iterates over all the modules currently loaded by the executable, +// according to windows, and makes sure they're all patched. Most +// modules will already be in loaded_modules, meaning we have already +// loaded and either patched them or determined they did not need to +// be patched. Others will not, which means we need to patch them +// (if necessary). Finally, we have to go through the existing +// g_module_libcs and see if any of those are *not* in the modules +// currently loaded by the executable. If so, we need to invalidate +// them. Returns true if we did any work (patching or invalidating), +// false if we were a noop. May update loaded_modules as well. +// NOTE: you must hold the patch_all_modules_lock to access loaded_modules. +bool PatchAllModules() { + std::vector<ModuleEntryCopy> modules; + bool made_changes = false; + + const HANDLE hCurrentProcess = GetCurrentProcess(); + DWORD num_modules = 0; + HMODULE hModules[kMaxModules]; // max # of modules we support in one process + if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &num_modules)) { + num_modules = 0; + } + // EnumProcessModules actually set the bytes written into hModules, + // so we need to divide to make num_modules actually be a module-count. + num_modules /= sizeof(*hModules); + if (num_modules >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + num_modules = kMaxModules; + } + + // Now we handle the unpatching of modules we have in g_module_libcs + // but that were not found in EnumProcessModules. We need to + // invalidate them. To speed that up, we store the EnumProcessModules + // output in a set. + // At the same time, we prepare for the adding of new modules, by + // removing from hModules all the modules we know we've already + // patched (or decided don't need to be patched). At the end, + // hModules will hold only the modules that we need to consider patching. + std::set<HMODULE> currently_loaded_modules; + { + SpinLockHolder h(&patch_all_modules_lock); + if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>; + // At the end of this loop, currently_loaded_modules contains the + // full list of EnumProcessModules, and hModules just the ones we + // haven't handled yet. + for (int i = 0; i < num_modules; ) { + currently_loaded_modules.insert(hModules[i]); + if (g_last_loaded->count(hModules[i]) > 0) { + hModules[i] = hModules[--num_modules]; // replace element i with tail + } else { + i++; // keep element i + } + } + // Now we do the unpatching/invalidation. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (g_module_libcs[i]->patched() && + currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) { + // Means g_module_libcs[i] is no longer loaded (no me32 matched). + // We could call Unpatch() here, but why bother? The module + // has gone away, so nobody is going to call into it anyway. + g_module_libcs[i]->set_is_valid(false); + made_changes = true; + } + } + // Update the loaded module cache. + g_last_loaded->swap(currently_loaded_modules); + } + + // Now that we know what modules are new, let's get the info we'll + // need to patch them. Note this *cannot* be done while holding the + // lock, since it needs to make windows calls (see the lock-inversion + // comments before the definition of patch_all_modules_lock). + MODULEINFO mi; + for (int i = 0; i < num_modules; i++) { + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + modules.push_back(ModuleEntryCopy(mi)); + } + + // Now we can do the patching of new modules. + { + SpinLockHolder h(&patch_all_modules_lock); + for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); + it != modules.end(); ++it) { + LibcInfo libc_info; + if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines + PatchOneModuleLocked(libc_info); + made_changes = true; + } + } + + // Now that we've dealt with the modules (dlls), update the main + // executable. We do this last because PatchMainExecutableLocked + // wants to look at how other modules were patched. + if (!main_executable.patched()) { + PatchMainExecutableLocked(); + made_changes = true; + } + } + // TODO(csilvers): for this to be reliable, we need to also take + // into account if we *would* have patched any modules had they not + // already been loaded. (That is, made_changes should ignore + // g_last_loaded.) + return made_changes; +} + + +} // end unnamed namespace + +// --------------------------------------------------------------------- +// Now that we've done all the patching machinery, let's actually +// define the functions we're patching in. Mostly these are +// simple wrappers around the do_* routines in tcmalloc.cc. +// +// In fact, we #include tcmalloc.cc to get at the tcmalloc internal +// do_* functions, the better to write our own hook functions. +// U-G-L-Y, I know. But the alternatives are, perhaps, worse. This +// also lets us define _msize(), _expand(), and other windows-specific +// functions here, using tcmalloc internals, without polluting +// tcmalloc.cc. +// ------------------------------------------------------------------- + +// TODO(csilvers): refactor tcmalloc.cc into two files, so I can link +// against the file with do_malloc, and ignore the one with malloc. +#include "tcmalloc.cc" + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW { + return malloc_fast_path<tcmalloc::malloc_oom>(size); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_free(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + // This calls the windows free if do_free decides ptr was not + // allocated by tcmalloc. Note it calls the origstub_free from + // *this* templatized instance of LibcInfo. See "template + // trickiness" above. + do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_free_base(void* ptr) __THROW{ + MallocHook::InvokeDeleteHook(ptr); + // This calls the windows free if do_free decides ptr was not + // allocated by tcmalloc. Note it calls the origstub_free from + // *this* templatized instance of LibcInfo. See "template + // trickiness" above. + do_free_with_callback(ptr, (void(*)(void*))origstub_fn_[kFreeBase], false, 0); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( + void* old_ptr, size_t new_size) __THROW { + if (old_ptr == NULL) { + void* result = do_malloc_or_cpp_alloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free_with_callback(old_ptr, + (void (*)(void*))origstub_fn_[kFree], false, 0); + return NULL; + } + return do_realloc_with_callback( + old_ptr, new_size, + (void (*)(void*))origstub_fn_[kFree], + (size_t (*)(const void*))origstub_fn_[k_Msize]); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_calloc( + size_t n, size_t elem_size) __THROW { + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new(size_t size) { + return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray(size_t size) { + return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { + return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { + return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); +} + + +// _msize() lets you figure out how much space is reserved for a +// pointer, in Windows. Even if applications don't call it, any DLL +// with global constructors will call (transitively) something called +// __dllonexit_lk in order to make sure the destructors get called +// when the dll unloads. And that will call msize -- horrible things +// can ensue if this is not hooked. Other parts of libc may also call +// this internally. + +template<int T> +size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW { + return GetSizeWithCallback(ptr, (size_t (*)(const void*))origstub_fn_[k_Msize]); +} + +// We need to define this because internal windows functions like to +// call into it(?). _expand() is like realloc but doesn't move the +// pointer. We punt, which will cause callers to fall back on realloc. +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr, + size_t size) __THROW { + return NULL; +} + +LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR)) + function_info_[kHeapAlloc].origstub_fn)( + hHeap, dwFlags, dwBytes); + MallocHook::InvokeNewHook(result, dwBytes); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem) { + MallocHook::InvokeDeleteHook(lpMem); + return ((BOOL (WINAPI *)(HANDLE, DWORD, LPVOID)) + function_info_[kHeapFree].origstub_fn)( + hHeap, dwFlags, lpMem); +} + +LPVOID WINAPI WindowsInfo::Perftools_VirtualAllocEx(HANDLE process, + LPVOID address, + SIZE_T size, DWORD type, + DWORD protect) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD)) + function_info_[kVirtualAllocEx].origstub_fn)( + process, address, size, type, protect); + // VirtualAllocEx() seems to be the Windows equivalent of mmap() + MallocHook::InvokeMmapHook(result, address, size, protect, type, -1, 0); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type) { + MallocHook::InvokeMunmapHook(address, size); + return ((BOOL (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD)) + function_info_[kVirtualFreeEx].origstub_fn)( + process, address, size, type); +} + +LPVOID WINAPI WindowsInfo::Perftools_MapViewOfFileEx( + HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { + // For this function pair, you always deallocate the full block of + // data that you allocate, so NewHook/DeleteHook is the right API. + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD, DWORD, + SIZE_T, LPVOID)) + function_info_[kMapViewOfFileEx].origstub_fn)( + hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, + dwFileOffsetLow, dwNumberOfBytesToMap, lpBaseAddress); + MallocHook::InvokeNewHook(result, dwNumberOfBytesToMap); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { + MallocHook::InvokeDeleteHook(lpBaseAddress); + return ((BOOL (WINAPI *)(LPCVOID)) + function_info_[kUnmapViewOfFile].origstub_fn)( + lpBaseAddress); +} + +HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags) { + HMODULE rv; + // Check to see if the modules is already loaded, flag 0 gets a + // reference if it was loaded. If it was loaded no need to call + // PatchAllModules, just increase the reference count to match + // what GetModuleHandleExW does internally inside windows. + if (::GetModuleHandleExW(0, lpFileName, &rv)) { + return rv; + } else { + // Not already loaded, so load it. + rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) + function_info_[kLoadLibraryExW].origstub_fn)( + lpFileName, hFile, dwFlags); + // This will patch any newly loaded libraries, if patching needs + // to be done. + PatchAllModules(); + + return rv; + } +} + +BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { + BOOL rv = ((BOOL (WINAPI *)(HMODULE)) + function_info_[kFreeLibrary].origstub_fn)(hLibModule); + + // Check to see if the module is still loaded by passing the base + // address and seeing if it comes back with the same address. If it + // is the same address it's still loaded, so the FreeLibrary() call + // was a noop, and there's no need to redo the patching. + HMODULE owner = NULL; + BOOL result = ::GetModuleHandleExW( + (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT), + (LPCWSTR)hLibModule, + &owner); + if (result && owner == hLibModule) + return rv; + + PatchAllModules(); // this will fix up the list of patched libraries + return rv; +} + + +// --------------------------------------------------------------------- +// PatchWindowsFunctions() +// This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. +// --------------------------------------------------------------------- + +void PatchWindowsFunctions() { + // This does the libc patching in every module, and the main executable. + PatchAllModules(); + main_executable_windows.Patch(); +} + +#if 0 +// It's possible to unpatch all the functions when we are exiting. + +// The idea is to handle properly windows-internal data that is +// allocated before PatchWindowsFunctions is called. If all +// destruction happened in reverse order from construction, then we +// could call UnpatchWindowsFunctions at just the right time, so that +// that early-allocated data would be freed using the windows +// allocation functions rather than tcmalloc. The problem is that +// windows allocates some structures lazily, so it would allocate them +// late (using tcmalloc) and then try to deallocate them late as well. +// So instead of unpatching, we just modify all the tcmalloc routines +// so they call through to the libc rountines if the memory in +// question doesn't seem to have been allocated with tcmalloc. I keep +// this unpatch code around for reference. + +void UnpatchWindowsFunctions() { + // We need to go back to the system malloc/etc at global destruct time, + // so objects that were constructed before tcmalloc, using the system + // malloc, can destroy themselves using the system free. This depends + // on DLLs unloading in the reverse order in which they load! + // + // We also go back to the default HeapAlloc/etc, just for consistency. + // Who knows, it may help avoid weird bugs in some situations. + main_executable_windows.Unpatch(); + main_executable.Unpatch(); + if (libc1.is_valid()) libc1.Unpatch(); + if (libc2.is_valid()) libc2.Unpatch(); + if (libc3.is_valid()) libc3.Unpatch(); + if (libc4.is_valid()) libc4.Unpatch(); + if (libc5.is_valid()) libc5.Unpatch(); + if (libc6.is_valid()) libc6.Unpatch(); + if (libc7.is_valid()) libc7.Unpatch(); + if (libc8.is_valid()) libc8.Unpatch(); +} +#endif diff --git a/src/third_party/gperftools-2.7/src/windows/port.cc b/src/third_party/gperftools-2.7/src/windows/port.cc new file mode 100644 index 00000000000..76224a23431 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/port.cc @@ -0,0 +1,235 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + */ + +#ifndef _WIN32 +# error You should only be including windows/port.cc in a windows environment! +#endif + +#define NOMINMAX // so std::max, below, compiles correctly +#include <config.h> +#include <string.h> // for strlen(), memset(), memcmp() +#include <assert.h> +#include <stdarg.h> // for va_list, va_start, va_end +#include <algorithm> // for std:{min,max} +#include <windows.h> +#include "port.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "internal_logging.h" + +// ----------------------------------------------------------------------- +// Basic libraries + +PERFTOOLS_DLL_DECL +int getpagesize() { + static int pagesize = 0; + if (pagesize == 0) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + pagesize = std::max(system_info.dwPageSize, + system_info.dwAllocationGranularity); + } + return pagesize; +} + +extern "C" PERFTOOLS_DLL_DECL void* __sbrk(ptrdiff_t increment) { + LOG(FATAL, "Windows doesn't implement sbrk!\n"); + return NULL; +} + +// We need to write to 'stderr' without having windows allocate memory. +// The safest way is via a low-level call like WriteConsoleA(). But +// even then we need to be sure to print in small bursts so as to not +// require memory allocation. +extern "C" PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len) { + // Looks like windows allocates for writes of >80 bytes + for (int i = 0; i < len; i += 80) { + write(STDERR_FILENO, buf + i, std::min(80, len - i)); + } +} + + +// ----------------------------------------------------------------------- +// Threads code + +// Windows doesn't support pthread_key_create's destr_function, and in +// fact it's a bit tricky to get code to run when a thread exits. This +// is cargo-cult magic from http://www.codeproject.com/threads/tls.asp. +// This code is for VC++ 7.1 and later; VC++ 6.0 support is possible +// but more busy-work -- see the webpage for how to do it. If all +// this fails, we could use DllMain instead. The big problem with +// DllMain is it doesn't run if this code is statically linked into a +// binary (it also doesn't run if the thread is terminated via +// TerminateThread, which if we're lucky this routine does). + +// Force a reference to _tls_used to make the linker create the TLS directory +// if it's not already there (that is, even if __declspec(thread) is not used). +// Force a reference to p_thread_callback_tcmalloc and p_process_term_tcmalloc +// to prevent whole program optimization from discarding the variables. +#ifdef _MSC_VER +#if defined(_M_IX86) +#pragma comment(linker, "/INCLUDE:__tls_used") +#pragma comment(linker, "/INCLUDE:_p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:_p_process_term_tcmalloc") +#elif defined(_M_X64) +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:p_process_term_tcmalloc") +#endif +#endif + +// When destr_fn eventually runs, it's supposed to take as its +// argument the tls-value associated with key that pthread_key_create +// creates. (Yeah, it sounds confusing but it's really not.) We +// store the destr_fn/key pair in this data structure. Because we +// store this in a single var, this implies we can only have one +// destr_fn in a program! That's enough in practice. If asserts +// trigger because we end up needing more, we'll have to turn this +// into an array. +struct DestrFnClosure { + void (*destr_fn)(void*); + pthread_key_t key_for_destr_fn_arg; +}; + +static DestrFnClosure destr_fn_info; // initted to all NULL/0. + +static int on_process_term(void) { + if (destr_fn_info.destr_fn) { + void *ptr = TlsGetValue(destr_fn_info.key_for_destr_fn_arg); + // This shouldn't be necessary, but in Release mode, Windows + // sometimes trashes the pointer in the TLS slot, so we need to + // remove the pointer from the TLS slot before the thread dies. + TlsSetValue(destr_fn_info.key_for_destr_fn_arg, NULL); + if (ptr) // pthread semantics say not to call if ptr is NULL + (*destr_fn_info.destr_fn)(ptr); + } + return 0; +} + +static void NTAPI on_tls_callback(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) { // thread is being destroyed! + on_process_term(); + } +} + +#ifdef _MSC_VER + +// extern "C" suppresses C++ name mangling so we know the symbol names +// for the linker /INCLUDE:symbol pragmas above. +extern "C" { +// This tells the linker to run these functions. +#pragma data_seg(push, old_seg) +#pragma data_seg(".CRT$XLB") +void (NTAPI *p_thread_callback_tcmalloc)( + HINSTANCE h, DWORD dwReason, PVOID pv) = on_tls_callback; +#pragma data_seg(".CRT$XTU") +int (*p_process_term_tcmalloc)(void) = on_process_term; +#pragma data_seg(pop, old_seg) +} // extern "C" + +#else // #ifdef _MSC_VER [probably msys/mingw] + +// We have to try the DllMain solution here, because we can't use the +// msvc-specific pragmas. +BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) + on_tls_callback(h, dwReason, pv); + else if (dwReason == DLL_PROCESS_DETACH) + on_process_term(); + return TRUE; +} + +#endif // #ifdef _MSC_VER + +extern "C" pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { + // Semantics are: we create a new key, and then promise to call + // destr_fn with TlsGetValue(key) when the thread is destroyed + // (as long as TlsGetValue(key) is not NULL). + pthread_key_t key = TlsAlloc(); + if (destr_fn) { // register it + // If this assert fails, we'll need to support an array of destr_fn_infos + assert(destr_fn_info.destr_fn == NULL); + destr_fn_info.destr_fn = destr_fn; + destr_fn_info.key_for_destr_fn_arg = key; + } + return key; +} + +// NOTE: this is Win2K and later. For Win98 we could use a CRITICAL_SECTION... +extern "C" int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)) { + // Try for a fast path first. Note: this should be an acquire semantics read. + // It is on x86 and x64, where Windows runs. + if (*once_control != 1) { + while (true) { + switch (InterlockedCompareExchange(once_control, 2, 0)) { + case 0: + init_routine(); + InterlockedExchange(once_control, 1); + return 0; + case 1: + // The initializer has already been executed + return 0; + default: + // The initializer is being processed by another thread + SwitchToThread(); + } + } + } + return 0; +} + + +// ----------------------------------------------------------------------- +// These functions rework existing functions of the same name in the +// Google codebase. + +// A replacement for HeapProfiler::CleanupOldProfiles. +void DeleteMatchingFiles(const char* prefix, const char* full_glob) { + WIN32_FIND_DATAA found; // that final A is for Ansi (as opposed to Unicode) + HANDLE hFind = FindFirstFileA(full_glob, &found); // A is for Ansi + if (hFind != INVALID_HANDLE_VALUE) { + const int prefix_length = strlen(prefix); + do { + const char *fname = found.cFileName; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(0, "Removing old heap profile %s\n", fname); + // TODO(csilvers): we really need to unlink dirname + fname + _unlink(fname); + } + } while (FindNextFileA(hFind, &found) != FALSE); // A is for Ansi + FindClose(hFind); + } +} diff --git a/src/third_party/gperftools-2.7/src/windows/port.h b/src/third_party/gperftools-2.7/src/windows/port.h new file mode 100644 index 00000000000..eb9702b331b --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/port.h @@ -0,0 +1,499 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * These are some portability typedefs and defines to make it a bit + * easier to compile this code under VC++. + * + * Several of these are taken from glib: + * http://developer.gnome.org/doc/API/glib/glib-windows-compatability-functions.html + */ + +#ifndef GOOGLE_BASE_WINDOWS_H_ +#define GOOGLE_BASE_WINDOWS_H_ + +/* You should never include this file directly, but always include it + from either config.h (MSVC) or mingw.h (MinGW/msys). */ +#if !defined(GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_) && \ + !defined(GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_) +# error "port.h should only be included from config.h or mingw.h" +#endif + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif +#include <windows.h> +#include <io.h> /* because we so often use open/close/etc */ +#include <direct.h> /* for _getcwd */ +#include <process.h> /* for _getpid */ +#include <limits.h> /* for PATH_MAX */ +#include <stdarg.h> /* for va_list */ +#include <stdio.h> /* need this to override stdio's (v)snprintf */ +#include <sys/types.h> /* for _off_t */ +#include <assert.h> +#include <stdlib.h> /* for rand, srand, _strtoxxx */ + +#if defined(_MSC_VER) && _MSC_VER >= 1900 +#define _TIMESPEC_DEFINED +#include <time.h> +#endif + +/* + * 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i) + * 4244: otherwise we get problems when subtracting two size_t's to an int + * 4288: VC++7 gets confused when a var is defined in a loop and then after it + * 4267: too many false positives for "conversion gives possible data loss" + * 4290: it's ok windows ignores the "throw" directive + * 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv() + * 4146: internal_logging.cc intentionally negates an unsigned value + */ +#ifdef _MSC_VER +#pragma warning(disable:4018 4244 4288 4267 4290 4996 4146) +#endif + +#ifndef __cplusplus +/* MSVC does not support C99 */ +# if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# define inline __inline +# else +# define inline static +# endif +# endif +#endif + +#ifdef __cplusplus +# define EXTERN_C extern "C" +#else +# define EXTERN_C extern +#endif + +/* ----------------------------------- BASIC TYPES */ + +#ifndef HAVE_STDINT_H +#ifndef HAVE___INT64 /* we need to have all the __intX names */ +# error Do not know how to set up type aliases. Edit port.h for your system. +#endif + +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#endif /* #ifndef HAVE_STDINT_H */ + +/* I guess MSVC's <types.h> doesn't include ssize_t by default? */ +#ifdef _MSC_VER +typedef intptr_t ssize_t; +#endif + +/* ----------------------------------- THREADS */ + +#ifndef HAVE_PTHREAD /* not true for MSVC, but may be true for MSYS */ +typedef DWORD pthread_t; +typedef DWORD pthread_key_t; +typedef LONG pthread_once_t; +enum { PTHREAD_ONCE_INIT = 0 }; /* important that this be 0! for SpinLock */ + +inline pthread_t pthread_self(void) { + return GetCurrentThreadId(); +} + +#ifdef __cplusplus +inline bool pthread_equal(pthread_t left, pthread_t right) { + return left == right; +} + +/* + * windows/port.h defines compatibility APIs for several .h files, which + * we therefore shouldn't be #including directly. This hack keeps us from + * doing so. TODO(csilvers): do something more principled. + */ +#define GOOGLE_MAYBE_THREADS_H_ 1 +/* This replaces maybe_threads.{h,cc} */ + +EXTERN_C pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)); /* port.cc */ + +inline int perftools_pthread_key_create(pthread_key_t *pkey, + void (*destructor)(void*)) { + pthread_key_t key = PthreadKeyCreate(destructor); + if (key != TLS_OUT_OF_INDEXES) { + *(pkey) = key; + return 0; + } else { + return GetLastError(); + } +} + +inline void* perftools_pthread_getspecific(DWORD key) { + DWORD err = GetLastError(); + void* rv = TlsGetValue(key); + if (err) SetLastError(err); + return rv; +} + +inline int perftools_pthread_setspecific(pthread_key_t key, const void *value) { + if (TlsSetValue(key, (LPVOID)value)) + return 0; + else + return GetLastError(); +} + +EXTERN_C int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)); + +#endif /* __cplusplus */ + +inline void sched_yield(void) { + Sleep(0); +} + +#endif /* HAVE_PTHREAD */ + +/* + * __declspec(thread) isn't usable in a dll opened via LoadLibrary(). + * But it doesn't work to LoadLibrary() us anyway, because of all the + * things we need to do before main()! So this kind of TLS is safe for us. + */ +#define __thread __declspec(thread) + +/* + * This code is obsolete, but I keep it around in case we are ever in + * an environment where we can't or don't want to use google spinlocks + * (from base/spinlock.{h,cc}). In that case, uncommenting this out, + * and removing spinlock.cc from the build, should be enough to revert + * back to using native spinlocks. + */ +#if 0 +// Windows uses a spinlock internally for its mutexes, making our life easy! +// However, the Windows spinlock must always be initialized, making life hard, +// since we want LINKER_INITIALIZED. We work around this by having the +// linker initialize a bool to 0, and check that before accessing the mutex. +// This replaces spinlock.{h,cc}, and all the stuff it depends on (atomicops) +#ifdef __cplusplus +class SpinLock { + public: + SpinLock() : initialize_token_(PTHREAD_ONCE_INIT) {} + // Used for global SpinLock vars (see base/spinlock.h for more details). + enum StaticInitializer { LINKER_INITIALIZED }; + explicit SpinLock(StaticInitializer) : initialize_token_(PTHREAD_ONCE_INIT) { + perftools_pthread_once(&initialize_token_, InitializeMutex); + } + + // It's important SpinLock not have a destructor: otherwise we run + // into problems when the main thread has exited, but other threads + // are still running and try to access a main-thread spinlock. This + // means we leak mutex_ (we should call DeleteCriticalSection() + // here). However, I've verified that all SpinLocks used in + // perftools have program-long scope anyway, so the leak is + // perfectly fine. But be aware of this for the future! + + void Lock() { + // You'd thionk this would be unnecessary, since we call + // InitializeMutex() in our constructor. But sometimes Lock() can + // be called before our constructor is! This can only happen in + // global constructors, when this is a global. If we live in + // bar.cc, and some global constructor in foo.cc calls a routine + // in bar.cc that calls this->Lock(), then Lock() may well run + // before our global constructor does. To protect against that, + // we do this check. For SpinLock objects created after main() + // has started, this pthread_once call will always be a noop. + perftools_pthread_once(&initialize_token_, InitializeMutex); + EnterCriticalSection(&mutex_); + } + void Unlock() { + LeaveCriticalSection(&mutex_); + } + + // Used in assertion checks: assert(lock.IsHeld()) (see base/spinlock.h). + inline bool IsHeld() const { + // This works, but probes undocumented internals, so I've commented it out. + // c.f. http://msdn.microsoft.com/msdnmag/issues/03/12/CriticalSections/ + //return mutex_.LockCount>=0 && mutex_.OwningThread==GetCurrentThreadId(); + return true; + } + private: + void InitializeMutex() { InitializeCriticalSection(&mutex_); } + + pthread_once_t initialize_token_; + CRITICAL_SECTION mutex_; +}; + +class SpinLockHolder { // Acquires a spinlock for as long as the scope lasts + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) : lock_(l) { l->Lock(); } + inline ~SpinLockHolder() { lock_->Unlock(); } +}; +#endif // #ifdef __cplusplus + +// This keeps us from using base/spinlock.h's implementation of SpinLock. +#define BASE_SPINLOCK_H_ 1 + +#endif /* #if 0 */ + +/* ----------------------------------- MMAP and other memory allocation */ + +#ifndef HAVE_MMAP /* not true for MSVC, but may be true for msys */ +#define MAP_FAILED 0 +#define MREMAP_FIXED 2 /* the value in linux, though it doesn't really matter */ +/* These, when combined with the mmap invariants below, yield the proper action */ +#define PROT_READ PAGE_READWRITE +#define PROT_WRITE PAGE_READWRITE +#define MAP_ANONYMOUS MEM_RESERVE +#define MAP_PRIVATE MEM_COMMIT +#define MAP_SHARED MEM_RESERVE /* value of this #define is 100% arbitrary */ + +#if __STDC__ && !defined(__MINGW32__) +typedef _off_t off_t; +#endif + +/* VirtualAlloc only replaces for mmap when certain invariants are kept. */ +inline void *mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) { + if (addr == NULL && fd == -1 && offset == 0 && + prot == (PROT_READ|PROT_WRITE) && flags == (MAP_PRIVATE|MAP_ANONYMOUS)) { + return VirtualAlloc(0, length, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + } else { + return NULL; + } +} + +inline int munmap(void *addr, size_t length) { + return VirtualFree(addr, 0, MEM_RELEASE) ? 0 : -1; +} +#endif /* HAVE_MMAP */ + +/* We could maybe use VirtualAlloc for sbrk as well, but no need */ +inline void *sbrk(intptr_t increment) { + // sbrk returns -1 on failure + return (void*)-1; +} + + +/* ----------------------------------- STRING ROUTINES */ + +/* + * We can't just use _vsnprintf and _snprintf as drop-in-replacements, + * because they don't always NUL-terminate. :-( We also can't use the + * name vsnprintf, since windows defines that (but not snprintf (!)). + */ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +/* We can use safe CRT functions, which the required functionality */ +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + return vsnprintf_s(str, size, _TRUNCATE, format, ap); +} +#else +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + if (size == 0) /* not even room for a \0? */ + return -1; /* not what C99 says to do, but what windows does */ + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} +#endif + +#ifndef HAVE_SNPRINTF +inline int snprintf(char *str, size_t size, const char *format, ...) { + va_list ap; + int r; + va_start(ap, format); + r = perftools_vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} +#endif + +#ifndef HAVE_INTTYPES_H +#define PRIx64 "I64x" +#define SCNx64 "I64x" +#define PRId64 "I64d" +#define SCNd64 "I64d" +#define PRIu64 "I64u" +#ifdef _WIN64 +# define PRIuPTR "llu" +# define PRIxPTR "llx" +#else +# define PRIuPTR "lu" +# define PRIxPTR "lx" +#endif +#endif + +/* ----------------------------------- FILE IO */ + +#ifndef PATH_MAX +#define PATH_MAX 1024 +#endif +#ifndef __MINGW32__ +enum { STDIN_FILENO = 0, STDOUT_FILENO = 1, STDERR_FILENO = 2 }; +#endif +#ifndef O_RDONLY +#define O_RDONLY _O_RDONLY +#endif + +#if __STDC__ && !defined(__MINGW32__) +/* These functions are considered non-standard */ +inline int access(const char *pathname, int mode) { + return _access(pathname, mode); +} +inline int open(const char *pathname, int flags, int mode = 0) { + return _open(pathname, flags, mode); +} +inline int close(int fd) { + return _close(fd); +} +inline ssize_t read(int fd, void *buf, size_t count) { + return _read(fd, buf, count); +} +inline ssize_t write(int fd, const void *buf, size_t count) { + return _write(fd, buf, count); +} +inline off_t lseek(int fd, off_t offset, int whence) { + return _lseek(fd, offset, whence); +} +inline char *getcwd(char *buf, size_t size) { + return _getcwd(buf, size); +} +inline int mkdir(const char *pathname, int) { + return _mkdir(pathname); +} + +inline FILE *popen(const char *command, const char *type) { + return _popen(command, type); +} +inline int pclose(FILE *stream) { + return _pclose(stream); +} +#endif + +EXTERN_C PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len); + +/* ----------------------------------- SYSTEM/PROCESS */ + +#ifndef HAVE_PID_T +typedef int pid_t; +#endif + +#if __STDC__ && !defined(__MINGW32__) +inline pid_t getpid(void) { return _getpid(); } +#endif +inline pid_t getppid(void) { return 0; } + +/* Handle case when poll is used to simulate sleep. */ +inline int poll(struct pollfd* fds, int nfds, int timeout) { + assert(fds == NULL); + assert(nfds == 0); + Sleep(timeout); + return 0; +} + +EXTERN_C PERFTOOLS_DLL_DECL int getpagesize(); /* in port.cc */ + +/* ----------------------------------- OTHER */ + +inline void srandom(unsigned int seed) { srand(seed); } +inline long random(void) { return rand(); } + +#ifndef HAVE_DECL_SLEEP +#define HAVE_DECL_SLEEP 0 +#endif + +#if !HAVE_DECL_SLEEP +inline unsigned int sleep(unsigned int seconds) { + Sleep(seconds * 1000); + return 0; +} +#endif + +// mingw64 seems to define timespec (though mingw.org mingw doesn't), +// protected by the _TIMESPEC_DEFINED macro. +#ifndef _TIMESPEC_DEFINED +struct timespec { + int tv_sec; + int tv_nsec; +}; +#endif + +#ifndef HAVE_DECL_NANOSLEEP +#define HAVE_DECL_NANOSLEEP 0 +#endif + +// latest mingw64 has nanosleep. Earlier mingw and MSVC do not +#if !HAVE_DECL_NANOSLEEP +inline int nanosleep(const struct timespec *req, struct timespec *rem) { + Sleep(req->tv_sec * 1000 + req->tv_nsec / 1000000); + return 0; +} +#endif + +#ifndef __MINGW32__ +#if defined(_MSC_VER) && _MSC_VER < 1800 +inline long long int strtoll(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +inline unsigned long long int strtoull(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long int strtoq(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +#endif +inline unsigned long long int strtouq(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long atoll(const char *nptr) { + return _atoi64(nptr); +} +#endif + +#define __THROW throw() + +/* ----------------------------------- TCMALLOC-SPECIFIC */ + +/* tcmalloc.cc calls this so we can patch VirtualAlloc() et al. */ +extern void PatchWindowsFunctions(); + +#endif /* _WIN32 */ + +#undef inline +#undef EXTERN_C + +#endif /* GOOGLE_BASE_WINDOWS_H_ */ diff --git a/src/third_party/gperftools-2.7/src/windows/preamble_patcher.cc b/src/third_party/gperftools-2.7/src/windows/preamble_patcher.cc new file mode 100644 index 00000000000..9ce08168019 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/preamble_patcher.cc @@ -0,0 +1,736 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// compatibility shims +#include "base/logging.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC +#define ASM_JMP32ABS_0 0xFF +#define ASM_JMP32ABS_1 0x25 +#define ASM_JMP8REL 0xEB +#define ASM_JCC32REL_0 0x0F +#define ASM_JCC32REL_1_MASK 0x80 +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_REXW 0x48 +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 + +namespace sidestep { + +PreamblePatcher::PreamblePage* PreamblePatcher::preamble_pages_ = NULL; +long PreamblePatcher::granularity_ = 0; +long PreamblePatcher::pagesize_ = 0; +bool PreamblePatcher::initialized_ = false; + +static const unsigned int kPreamblePageMagic = 0x4347414D; // "MAGC" + +// Handle a special case that we see with functions that point into an +// IAT table (including functions linked statically into the +// application): these function already starts with ASM_JMP32*. For +// instance, malloc() might be implemented as a JMP to __malloc(). +// This function follows the initial JMPs for us, until we get to the +// place where the actual code is defined. If we get to STOP_BEFORE, +// we return the address before stop_before. The stop_before_trampoline +// flag is used in 64-bit mode. If true, we will return the address +// before a trampoline is detected. Trampolines are defined as: +// +// nop +// mov rax, <replacement_function> +// jmp rax +// +// See PreamblePatcher::RawPatchWithStub for more information. +void* PreamblePatcher::ResolveTargetImpl(unsigned char* target, + unsigned char* stop_before, + bool stop_before_trampoline) { + if (target == NULL) + return NULL; + while (1) { + unsigned char* new_target; + if (target[0] == ASM_JMP32REL) { + // target[1-4] holds the place the jmp goes to, but it's + // relative to the next instruction. + int relative_offset; // Windows guarantees int is 4 bytes + SIDESTEP_ASSERT(sizeof(relative_offset) == 4); + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 4); + new_target = target + 5 + relative_offset; + } else if (target[0] == ASM_JMP8REL) { + // Visual Studio 7.1 implements new[] as an 8 bit jump to new + signed char relative_offset; + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 1); + new_target = target + 2 + relative_offset; + } else if (target[0] == ASM_JMP32ABS_0 && + target[1] == ASM_JMP32ABS_1) { + jmp32rel: + // Visual studio seems to sometimes do it this way instead of the + // previous way. Not sure what the rules are, but it was happening + // with operator new in some binaries. + void** new_target_v; + if (kIs64BitBinary) { + // In 64-bit mode JMPs are RIP-relative, not absolute + int target_offset; + memcpy(reinterpret_cast<void*>(&target_offset), + reinterpret_cast<void*>(target + 2), 4); + new_target_v = reinterpret_cast<void**>(target + target_offset + 6); + } else { + SIDESTEP_ASSERT(sizeof(new_target) == 4); + memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4); + } + new_target = reinterpret_cast<unsigned char*>(*new_target_v); + } else if (kIs64BitBinary && target[0] == ASM_REXW + && target[1] == ASM_JMP32ABS_0 + && target[2] == ASM_JMP32ABS_1) { + // in Visual Studio 2012 we're seeing jump like that: + // rex.W jmpq *0x11d019(%rip) + // + // according to docs I have, rex prefix is actually unneeded and + // can be ignored. I.e. docs say for jumps like that operand + // already defaults to 64-bit. But clearly it breaks abs. jump + // detection above and we just skip rex + target++; + goto jmp32rel; + } else { + break; + } + if (new_target == stop_before) + break; + if (stop_before_trampoline && *new_target == ASM_NOP + && new_target[1] == ASM_REXW && new_target[2] == ASM_MOVRAX_IMM) + break; + target = new_target; + } + return target; +} + +// Special case scoped_ptr to avoid dependency on scoped_ptr below. +class DeleteUnsignedCharArray { + public: + DeleteUnsignedCharArray(unsigned char* array) : array_(array) { + } + + ~DeleteUnsignedCharArray() { + if (array_) { + PreamblePatcher::FreePreambleBlock(array_); + } + } + + unsigned char* Release() { + unsigned char* temp = array_; + array_ = NULL; + return temp; + } + + private: + unsigned char* array_; +}; + +SideStepError PreamblePatcher::RawPatchWithStubAndProtections( + void* target_function, void *replacement_function, + unsigned char* preamble_stub, unsigned long stub_size, + unsigned long* bytes_needed) { + // We need to be able to write to a process-local copy of the first + // MAX_PREAMBLE_STUB_SIZE bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + SideStepError error_code = RawPatchWithStub(target_function, + replacement_function, + preamble_stub, + stub_size, + bytes_needed); + + // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of + // pTargetFunction to what they were before we started goofing around. + // We do this regardless of whether the patch succeeded or not. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + old_target_function_protect, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && + "Failed to restore protection to target function."); + // We must not return an error here because the function has + // likely actually been patched, and returning an error might + // cause our client code not to unpatch it. So we just keep + // going. + } + + if (SIDESTEP_SUCCESS != error_code) { // Testing RawPatchWithStub, above + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub) { + if (!target_function || !replacement_function || !original_function_stub || + (*original_function_stub) || target_function == replacement_function) { + SIDESTEP_ASSERT(false && "Preconditions not met"); + return SIDESTEP_INVALID_PARAMETER; + } + + BOOL succeeded = FALSE; + + // First, deal with a special case that we see with functions that + // point into an IAT table (including functions linked statically + // into the application): these function already starts with + // ASM_JMP32REL. For instance, malloc() might be implemented as a + // JMP to __malloc(). In that case, we replace the destination of + // the JMP (__malloc), rather than the JMP itself (malloc). This + // way we get the correct behavior no matter how malloc gets called. + void* new_target = ResolveTarget(target_function); + if (new_target != target_function) { + target_function = new_target; + } + + // In 64-bit mode, preamble_stub must be within 2GB of target function + // so that if target contains a jump, we can translate it. + unsigned char* preamble_stub = AllocPreambleBlockNear(target_function); + if (!preamble_stub) { + SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub."); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // Frees the array at end of scope. + DeleteUnsignedCharArray guard_preamble_stub(preamble_stub); + + SideStepError error_code = RawPatchWithStubAndProtections( + target_function, replacement_function, preamble_stub, + MAX_PREAMBLE_STUB_SIZE, NULL); + + if (SIDESTEP_SUCCESS != error_code) { + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + SIDESTEP_LOG("PreamblePatcher::RawPatch successfully patched."); + + // detach the scoped pointer so the memory is not freed + *original_function_stub = + reinterpret_cast<void*>(guard_preamble_stub.Release()); + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub) { + SIDESTEP_ASSERT(target_function && replacement_function && + original_function_stub); + if (!target_function || !replacement_function || + !original_function_stub) { + return SIDESTEP_INVALID_PARAMETER; + } + + // Before unpatching, target_function should be a JMP to + // replacement_function. If it's not, then either it's an error, or + // we're falling into the case where the original instruction was a + // JMP, and we patched the jumped_to address rather than the JMP + // itself. (For instance, if malloc() is just a JMP to __malloc(), + // we patched __malloc() and not malloc().) + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + target = reinterpret_cast<unsigned char*>( + ResolveTargetImpl( + target, reinterpret_cast<unsigned char*>(replacement_function), + true)); + // We should end at the function we patched. When we patch, we insert + // a ASM_JMP32REL instruction, so look for that as a sanity check. + if (target[0] != ASM_JMP32REL) { + SIDESTEP_ASSERT(false && + "target_function does not look like it was patched."); + return SIDESTEP_INVALID_PARAMETER; + } + + const unsigned int kRequiredTargetPatchBytes = 5; + + // We need to be able to write to a process-local copy of the first + // kRequiredTargetPatchBytes bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + kRequiredTargetPatchBytes, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + unsigned char* preamble_stub = reinterpret_cast<unsigned char*>( + original_function_stub); + + // Disassemble the preamble of stub and copy the bytes back to target. + // If we've done any conditional jumps in the preamble we need to convert + // them back to the original REL8 jumps in the target. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + unsigned int target_bytes = 0; + while (target_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; + InstructionType instruction_type = + disassembler.Disassemble(preamble_stub + preamble_bytes, cur_bytes); + if (IT_JUMP == instruction_type) { + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsNearConditionalJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeJump(preamble_stub + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(preamble_stub + preamble_bytes, cur_bytes) || + IsNearRelativeCall(preamble_stub + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(preamble_stub + preamble_bytes, + cur_bytes, target + target_bytes, + &jump_bytes, MAX_PREAMBLE_STUB_SIZE); + } + if (jump_ret == SIDESTEP_JUMP_INSTRUCTION) { + SIDESTEP_ASSERT(false && + "Found unsupported jump instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + target_bytes += jump_bytes; + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes, + target + target_bytes, &mov_bytes, + MAX_PREAMBLE_STUB_SIZE) + != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Found unsupported generic instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + } else { + memcpy(reinterpret_cast<void*>(target + target_bytes), + reinterpret_cast<void*>(reinterpret_cast<unsigned char*>( + original_function_stub) + preamble_bytes), cur_bytes); + target_bytes += cur_bytes; + } + } else { + SIDESTEP_ASSERT(false && + "Found unsupported instruction in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + preamble_bytes += cur_bytes; + } + + FreePreambleBlock(reinterpret_cast<unsigned char*>(original_function_stub)); + + // Restore the protection of the first kRequiredTargetPatchBytes bytes of + // target to what they were before we started goofing around. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + kRequiredTargetPatchBytes, + old_target_function_protect, + &old_target_function_protect); + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // See comment on FlushInstructionCache elsewhere in this file. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + return SIDESTEP_UNEXPECTED; + } + + SIDESTEP_LOG("PreamblePatcher::Unpatch successfully unpatched."); + return SIDESTEP_SUCCESS; +} + +void PreamblePatcher::Initialize() { + if (!initialized_) { + SYSTEM_INFO si = { 0 }; + ::GetSystemInfo(&si); + granularity_ = si.dwAllocationGranularity; + pagesize_ = si.dwPageSize; + initialized_ = true; + } +} + +unsigned char* PreamblePatcher::AllocPreambleBlockNear(void* target) { + PreamblePage* preamble_page = preamble_pages_; + while (preamble_page != NULL) { + if (preamble_page->free_ != NULL) { + __int64 val = reinterpret_cast<__int64>(preamble_page) - + reinterpret_cast<__int64>(target); + if ((val > 0 && val + pagesize_ <= INT_MAX) || + (val < 0 && val >= INT_MIN)) { + break; + } + } + preamble_page = preamble_page->next_; + } + + // The free_ member of the page is used to store the next available block + // of memory to use or NULL if there are no chunks available, in which case + // we'll allocate a new page. + if (preamble_page == NULL || preamble_page->free_ == NULL) { + // Create a new preamble page and initialize the free list + preamble_page = reinterpret_cast<PreamblePage*>(AllocPageNear(target)); + SIDESTEP_ASSERT(preamble_page != NULL && "Could not allocate page!"); + void** pp = &preamble_page->free_; + unsigned char* ptr = reinterpret_cast<unsigned char*>(preamble_page) + + MAX_PREAMBLE_STUB_SIZE; + unsigned char* limit = reinterpret_cast<unsigned char*>(preamble_page) + + pagesize_; + while (ptr < limit) { + *pp = ptr; + pp = reinterpret_cast<void**>(ptr); + ptr += MAX_PREAMBLE_STUB_SIZE; + } + *pp = NULL; + // Insert the new page into the list + preamble_page->magic_ = kPreamblePageMagic; + preamble_page->next_ = preamble_pages_; + preamble_pages_ = preamble_page; + } + unsigned char* ret = reinterpret_cast<unsigned char*>(preamble_page->free_); + preamble_page->free_ = *(reinterpret_cast<void**>(preamble_page->free_)); + return ret; +} + +void PreamblePatcher::FreePreambleBlock(unsigned char* block) { + SIDESTEP_ASSERT(block != NULL); + SIDESTEP_ASSERT(granularity_ != 0); + uintptr_t ptr = reinterpret_cast<uintptr_t>(block); + ptr -= ptr & (granularity_ - 1); + PreamblePage* preamble_page = reinterpret_cast<PreamblePage*>(ptr); + SIDESTEP_ASSERT(preamble_page->magic_ == kPreamblePageMagic); + *(reinterpret_cast<void**>(block)) = preamble_page->free_; + preamble_page->free_ = block; +} + +void* PreamblePatcher::AllocPageNear(void* target) { + MEMORY_BASIC_INFORMATION mbi = { 0 }; + if (!::VirtualQuery(target, &mbi, sizeof(mbi))) { + SIDESTEP_ASSERT(false && "VirtualQuery failed on target address"); + return 0; + } + if (initialized_ == false) { + PreamblePatcher::Initialize(); + SIDESTEP_ASSERT(initialized_); + } + void* pv = NULL; + unsigned char* allocation_base = reinterpret_cast<unsigned char*>( + mbi.AllocationBase); + __int64 i = 1; + bool high_target = reinterpret_cast<__int64>(target) > UINT_MAX; + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) - + (i * granularity_); + if (high_target && + reinterpret_cast<__int64>(target) - val > INT_MAX) { + // We're further than 2GB from the target + break; + } else if (val <= 0) { + // Less than 0 + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base - + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + + // We couldn't allocate low, try to allocate high + if (pv == NULL) { + i = 1; + // Round up to the next multiple of page granularity + allocation_base = reinterpret_cast<unsigned char*>( + (reinterpret_cast<__int64>(target) & + (~(granularity_ - 1))) + granularity_); + while (pv == NULL) { + __int64 val = reinterpret_cast<__int64>(allocation_base) + + (i * granularity_) - reinterpret_cast<__int64>(target); + if (val > INT_MAX || val < 0) { + // We're too far or we overflowed + break; + } + pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base + + (i++ * granularity_)), + pagesize_, MEM_COMMIT | MEM_RESERVE, + PAGE_EXECUTE_READWRITE); + } + } + return pv; +} + +bool PreamblePatcher::IsShortConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return (*(target) & 0x70) == 0x70 && instruction_size == 2; +} + +bool PreamblePatcher::IsShortJump( + unsigned char* target, + unsigned int instruction_size) { + return target[0] == 0xeb && instruction_size == 2; +} + +bool PreamblePatcher::IsNearConditionalJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xf && (*(target + 1) & 0x80) == 0x80 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeJump( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe9 && instruction_size == 5; +} + +bool PreamblePatcher::IsNearAbsoluteCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xff && (*(target + 1) & 0x10) == 0x10 && + instruction_size == 6; +} + +bool PreamblePatcher::IsNearRelativeCall( + unsigned char* target, + unsigned int instruction_size) { + return *(target) == 0xe8 && instruction_size == 5; +} + +bool PreamblePatcher::IsMovWithDisplacement( + unsigned char* target, + unsigned int instruction_size) { + // In this case, the ModRM byte's mod field will be 0 and r/m will be 101b (5) + return instruction_size == 7 && *target == 0x48 && *(target + 1) == 0x8b && + (*(target + 2) >> 6) == 0 && (*(target + 2) & 0x7) == 5; +} + +SideStepError PreamblePatcher::PatchShortConditionalJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + // note: rel8 offset is signed. Thus we need to ask for signed char + // to negative offsets right + unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]); + unsigned char* stub_jump_from = target + 6; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 6; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // 0f 8x xx xx xx xx = Jcc rel32off + unsigned short jmpcode = ((0x80 | (source[0] & 0xf)) << 8) | 0x0f; + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), 2); + memcpy(reinterpret_cast<void*>(target + 2), + reinterpret_cast<void*>(&fixup_jump_offset), 4); + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchShortJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + // note: rel8 offset is _signed_. Thus we need signed char here. + unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]); + unsigned char* stub_jump_from = target + 5; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 5; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // e9 xx xx xx xx = jmp rel32off + target[0] = 0xe9; + memcpy(reinterpret_cast<void*>(target + 1), + reinterpret_cast<void*>(&fixup_jump_offset), 4); + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchNearJumpOrCall( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 5 || instruction_size == 6); + unsigned int jmp_offset_in_instruction = instruction_size == 5 ? 1 : 2; + unsigned char* original_jump_dest = reinterpret_cast<unsigned char *>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + jmp_offset_in_instruction))); + unsigned char* stub_jump_from = target + instruction_size; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + if ((fixup_jump_offset < SCHAR_MAX && fixup_jump_offset > SCHAR_MIN)) { + *target_bytes = 2; + if (target_size > *target_bytes) { + // If the new offset is in range, use a short jump instead of a near jump. + if (source[0] == ASM_JCC32REL_0 && + (source[1] & ASM_JCC32REL_1_MASK) == ASM_JCC32REL_1_MASK) { + unsigned short jmpcode = (static_cast<unsigned char>( + fixup_jump_offset) << 8) | (0x70 | (source[1] & 0xf)); + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(&jmpcode), + 2); + } else { + target[0] = ASM_JMP8REL; + target[1] = static_cast<unsigned char>(fixup_jump_offset); + } + } + } else { + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + jmp_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + jmp_offset_in_instruction), + reinterpret_cast<void*>(&fixup_jump_offset), + 4); + } + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchMovWithDisplacement( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + SIDESTEP_ASSERT(instruction_size == 7); + const int mov_offset_in_instruction = 3; // 0x48 0x8b 0x0d <offset> + unsigned char* original_mov_dest = reinterpret_cast<unsigned char*>( + reinterpret_cast<__int64>(source + instruction_size) + + *(reinterpret_cast<int*>(source + mov_offset_in_instruction))); + unsigned char* stub_mov_from = target + instruction_size; + __int64 fixup_mov_offset = original_mov_dest - stub_mov_from; + if (fixup_mov_offset > INT_MAX || fixup_mov_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up near MOV because target is too far away."); + return SIDESTEP_UNEXPECTED; + } + *target_bytes = instruction_size; + if (target_size > *target_bytes) { + memcpy(reinterpret_cast<void*>(target), + reinterpret_cast<void*>(source), + mov_offset_in_instruction); + memcpy(reinterpret_cast<void*>(target + mov_offset_in_instruction), + reinterpret_cast<void*>(&fixup_mov_offset), + 4); + } + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.7/src/windows/preamble_patcher.h b/src/third_party/gperftools-2.7/src/windows/preamble_patcher.h new file mode 100644 index 00000000000..76f158a19a1 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/preamble_patcher.h @@ -0,0 +1,620 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Definition of PreamblePatcher + */ + +#ifndef GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ +#define GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ + +#include "config.h" +#include <windows.h> + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +// Maximum size of the preamble stub. We overwrite at least the first 5 +// bytes of the function. Considering the worst case scenario, we need 4 +// bytes + the max instruction size + 5 more bytes for our jump back to +// the original code. With that in mind, 32 is a good number :) +#ifdef _M_X64 +// In 64-bit mode we may need more room. In 64-bit mode all jumps must be +// within +/-2GB of RIP. Because of this limitation we may need to use a +// trampoline to jump to the replacement function if it is further than 2GB +// away from the target. The trampoline is 14 bytes. +// +// So 4 bytes + max instruction size (17 bytes) + 5 bytes to jump back to the +// original code + trampoline size. 64 bytes is a nice number :-) +#define MAX_PREAMBLE_STUB_SIZE (64) +#else +#define MAX_PREAMBLE_STUB_SIZE (32) +#endif + +// Determines if this is a 64-bit binary. +#ifdef _M_X64 +static const bool kIs64BitBinary = true; +#else +static const bool kIs64BitBinary = false; +#endif + +namespace sidestep { + +// Possible results of patching/unpatching +enum SideStepError { + SIDESTEP_SUCCESS = 0, + SIDESTEP_INVALID_PARAMETER, + SIDESTEP_INSUFFICIENT_BUFFER, + SIDESTEP_JUMP_INSTRUCTION, + SIDESTEP_FUNCTION_TOO_SMALL, + SIDESTEP_UNSUPPORTED_INSTRUCTION, + SIDESTEP_NO_SUCH_MODULE, + SIDESTEP_NO_SUCH_FUNCTION, + SIDESTEP_ACCESS_DENIED, + SIDESTEP_UNEXPECTED, +}; + +#define SIDESTEP_TO_HRESULT(error) \ + MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error) + +class DeleteUnsignedCharArray; + +// Implements a patching mechanism that overwrites the first few bytes of +// a function preamble with a jump to our hook function, which is then +// able to call the original function via a specially-made preamble-stub +// that imitates the action of the original preamble. +// +// NOTE: This patching mechanism should currently only be used for +// non-production code, e.g. unit tests, because it is not threadsafe. +// See the TODO in preamble_patcher_with_stub.cc for instructions on what +// we need to do before using it in production code; it's fairly simple +// but unnecessary for now since we only intend to use it in unit tests. +// +// To patch a function, use either of the typesafe Patch() methods. You +// can unpatch a function using Unpatch(). +// +// Typical usage goes something like this: +// @code +// typedef int (*MyTypesafeFuncPtr)(int x); +// MyTypesafeFuncPtr original_func_stub; +// int MyTypesafeFunc(int x) { return x + 1; } +// int HookMyTypesafeFunc(int x) { return 1 + original_func_stub(x); } +// +// void MyPatchInitializingFunction() { +// original_func_stub = PreamblePatcher::Patch( +// MyTypesafeFunc, HookMyTypesafeFunc); +// if (!original_func_stub) { +// // ... error handling ... +// } +// +// // ... continue - you have patched the function successfully ... +// } +// @endcode +// +// Note that there are a number of ways that this method of patching can +// fail. The most common are: +// - If there is a jump (jxx) instruction in the first 5 bytes of +// the function being patched, we cannot patch it because in the +// current implementation we do not know how to rewrite relative +// jumps after relocating them to the preamble-stub. Note that +// if you really really need to patch a function like this, it +// would be possible to add this functionality (but at some cost). +// - If there is a return (ret) instruction in the first 5 bytes +// we cannot patch the function because it may not be long enough +// for the jmp instruction we use to inject our patch. +// - If there is another thread currently executing within the bytes +// that are copied to the preamble stub, it will crash in an undefined +// way. +// +// If you get any other error than the above, you're either pointing the +// patcher at an invalid instruction (e.g. into the middle of a multi- +// byte instruction, or not at memory containing executable instructions) +// or, there may be a bug in the disassembler we use to find +// instruction boundaries. +// +// NOTE: In optimized builds, when you have very trivial functions that +// the compiler can reason do not have side effects, the compiler may +// reuse the result of calling the function with a given parameter, which +// may mean if you patch the function in between your patch will never get +// invoked. See preamble_patcher_test.cc for an example. +class PERFTOOLS_DLL_DECL PreamblePatcher { + public: + + // This is a typesafe version of RawPatch(), identical in all other + // ways than it takes a template parameter indicating the type of the + // function being patched. + // + // @param T The type of the function you are patching. Usually + // you will establish this type using a typedef, as in the following + // example: + // @code + // typedef BOOL (WINAPI *MessageBoxPtr)(HWND, LPCTSTR, LPCTSTR, UINT); + // MessageBoxPtr original = NULL; + // PreamblePatcher::Patch(MessageBox, Hook_MessageBox, &original); + // @endcode + template <class T> + static SideStepError Patch(T target_function, + T replacement_function, + T* original_function_stub) { + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)(target_function), + (void*)(replacement_function), + (void**)(original_function_stub)); + } + + // Patches a named function imported from the named module using + // preamble patching. Uses RawPatch() to do the actual patching + // work. + // + // @param T The type of the function you are patching. Must + // exactly match the function you specify using module_name and + // function_name. + // + // @param module_name The name of the module from which the function + // is being imported. Note that the patch will fail if this module + // has not already been loaded into the current process. + // + // @param function_name The name of the function you wish to patch. + // + // @param replacement_function Your replacement function which + // will be called whenever code tries to call the original function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + template <class T> + static SideStepError Patch(LPCTSTR module_name, + LPCSTR function_name, + T replacement_function, + T* original_function_stub) { + SIDESTEP_ASSERT(module_name && function_name); + if (!module_name || !function_name) { + SIDESTEP_ASSERT(false && + "You must specify a module name and function name."); + return SIDESTEP_INVALID_PARAMETER; + } + HMODULE module = ::GetModuleHandle(module_name); + SIDESTEP_ASSERT(module != NULL); + if (!module) { + SIDESTEP_ASSERT(false && "Invalid module name."); + return SIDESTEP_NO_SUCH_MODULE; + } + FARPROC existing_function = ::GetProcAddress(module, function_name); + if (!existing_function) { + SIDESTEP_ASSERT( + false && "Did not find any function with that name in the module."); + return SIDESTEP_NO_SUCH_FUNCTION; + } + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)existing_function, (void*)replacement_function, + (void**)(original_function_stub)); + } + + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is the "worker" function + // for each of the typesafe Patch() functions. In most cases, + // it is preferable to use the Patch() functions rather than + // this one as they do more checking at compile time. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + // + // @note The preamble-stub (the memory pointed to by + // *original_function_stub) is allocated on the heap, and (in + // production binaries) never destroyed, resulting in a memory leak. This + // will be the case until we implement safe unpatching of a method. + // However, it is quite difficult to unpatch a method (because other + // threads in the process may be using it) so we are leaving it for now. + // See however UnsafeUnpatch, which can be used for binaries where you + // know only one thread is running, e.g. unit tests. + static SideStepError RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub); + + // Unpatches target_function and deletes the stub that previously could be + // used to call the original version of the function. + // + // DELETES the stub that is passed to the function. + // + // @param target_function Pointer to the target function which was + // previously patched, i.e. a pointer which value should match the value + // of the symbol prior to patching it. + // + // @param replacement_function Pointer to the function target_function + // was patched to. + // + // @param original_function_stub Pointer to the stub returned when + // patching, that could be used to call the original version of the + // patched function. This function will also delete the stub, which after + // unpatching is useless. + // + // If your original call was + // Patch(VirtualAlloc, MyVirtualAlloc, &origptr) + // then to undo it you would call + // Unpatch(VirtualAlloc, MyVirtualAlloc, origptr); + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + static SideStepError Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub); + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + template <class T> + static T ResolveTarget(T target_function) { + return (T)ResolveTargetImpl((unsigned char*)target_function, NULL); + } + + // Allocates a block of memory of size MAX_PREAMBLE_STUB_SIZE that is as + // close (within 2GB) as possible to target. This is done to ensure that + // we can perform a relative jump from target to a trampoline if the + // replacement function is > +-2GB from target. This means that we only need + // to patch 5 bytes in the target function. + // + // @param target Pointer to target function. + // + // @return Returns a block of memory of size MAX_PREAMBLE_STUB_SIZE that can + // be used to store a function preamble block. + static unsigned char* AllocPreambleBlockNear(void* target); + + // Frees a block allocated by AllocPreambleBlockNear. + // + // @param block Block that was returned by AllocPreambleBlockNear. + static void FreePreambleBlock(unsigned char* block); + + private: + friend class DeleteUnsignedCharArray; + + // Used to store data allocated for preamble stubs + struct PreamblePage { + unsigned int magic_; + PreamblePage* next_; + // This member points to a linked list of free blocks within the page + // or NULL if at the end + void* free_; + }; + + // In 64-bit mode, the replacement function must be within 2GB of the original + // target in order to only require 5 bytes for the function patch. To meet + // this requirement we're creating an allocator within this class to + // allocate blocks that are within 2GB of a given target. This member is the + // head of a linked list of pages used to allocate blocks that are within + // 2GB of the target. + static PreamblePage* preamble_pages_; + + // Page granularity + static long granularity_; + + // Page size + static long pagesize_; + + // Determines if the patcher has been initialized. + static bool initialized_; + + // Used to initialize static members. + static void Initialize(); + + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is similar to the RawPatch + // function except that it uses the stub allocated by the caller + // instead of allocating it. + // + // We call VirtualProtect to make the + // target function writable at least for the duration of the call. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStubAndProtections( + void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + // A helper function used by RawPatchWithStubAndProtections -- it + // does everything but the VirtualProtect work. Defined in + // preamble_patcher_with_stub.cc. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStub(void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @param stop_before If, when following JMP instructions from + // target_function, we get to the address stop, we return + // immediately, the address that jumps to stop_before. + // + // @param stop_before_trampoline When following JMP instructions from + // target_function, stop before a trampoline is detected. See comment in + // PreamblePatcher::RawPatchWithStub for more information. This parameter + // has no effect in 32-bit mode. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + static void* ResolveTargetImpl(unsigned char* target_function, + unsigned char* stop_before, + bool stop_before_trampoline = false); + + // Helper routine that attempts to allocate a page as close (within 2GB) + // as possible to target. + // + // @param target Pointer to target function. + // + // @return Returns an address that is within 2GB of target. + static void* AllocPageNear(void* target); + + // Helper routine that determines if a target instruction is a short + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a short conditional jump. + static bool IsShortConditionalJump(unsigned char* target, + unsigned int instruction_size); + + static bool IsShortJump(unsigned char *target, unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // conditional jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near conditional jump. + static bool IsNearConditionalJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // relative jump. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute jump. + static bool IsNearRelativeJump(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearAbsoluteCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a near + // absolute call. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a near absolute call. + static bool IsNearRelativeCall(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that determines if a target instruction is a 64-bit MOV + // that uses a RIP-relative displacement. + // + // @param target Pointer to instruction. + // + // @param instruction_size Size of the instruction in bytes. + // + // @return Returns true if the instruction is a MOV with displacement. + static bool IsMovWithDisplacement(unsigned char* target, + unsigned int instruction_size); + + // Helper routine that converts a short conditional jump instruction + // to a near conditional jump in a target buffer. Note that the target + // buffer must be within 2GB of the source for the near jump to work. + // + // A short conditional jump instruction is in the format: + // 7x xx = Jcc rel8off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchShortConditionalJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + static SideStepError PatchShortJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that converts an instruction that will convert various + // jump-like instructions to corresponding instructions in the target buffer. + // What this routine does is fix up the relative offsets contained in jump + // instructions to point back to the original target routine. Like with + // PatchShortConditionalJump, the target buffer must be within 2GB of the + // source. + // + // We currently handle the following instructions: + // + // E9 xx xx xx xx = JMP rel32off + // 0F 8x xx xx xx xx = Jcc rel32off + // FF /2 xx xx xx xx = CALL reg/mem32/mem64 + // E8 xx xx xx xx = CALL rel32off + // + // It should not be hard to update this function to support other + // instructions that jump to relative targets. + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchNearJumpOrCall(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + + // Helper routine that patches a 64-bit MOV instruction with a RIP-relative + // displacement. The target buffer must be within 2GB of the source. + // + // 48 8B 0D XX XX XX XX = MOV rel32off + // + // @param source Pointer to instruction. + // + // @param instruction_size Size of the instruction. + // + // @param target Target buffer to write the new instruction. + // + // @param target_bytes Pointer to a buffer that contains the size + // of the target instruction, in bytes. + // + // @param target_size Size of the target buffer. + // + // @return Returns SIDESTEP_SUCCESS if successful, otherwise an error. + static SideStepError PatchMovWithDisplacement(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ diff --git a/src/third_party/gperftools-2.7/src/windows/preamble_patcher_test.cc b/src/third_party/gperftools-2.7/src/windows/preamble_patcher_test.cc new file mode 100644 index 00000000000..e4605c6fb86 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/preamble_patcher_test.cc @@ -0,0 +1,368 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2011, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Unit tests for PreamblePatcher + */ + +#include "config_for_unittests.h" +#include "preamble_patcher.h" +#include "mini_disassembler.h" +#pragma warning(push) +#pragma warning(disable:4553) +#include "auto_testing_hook.h" +#pragma warning(pop) + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <tchar.h> + +// Turning off all optimizations for this file, since the official build's +// "Whole program optimization" seems to cause the TestPatchUsingDynamicStub +// test to crash with an access violation. We debugged this and found +// that the optimized access a register that is changed by a call to the hook +// function. +#pragma optimize("", off) + +// A convenience macro to avoid a lot of casting in the tests. +// I tried to make this a templated function, but windows complained: +// error C2782: 'sidestep::SideStepError `anonymous-namespace'::Unpatch(T,T,T *)' : template parameter 'T' is ambiguous +// could be 'int (int)' +// or 'int (__cdecl *)(int)' +// My life isn't long enough to try to figure out how to fix this. +#define UNPATCH(target_function, replacement_function, original_function_stub) \ + sidestep::PreamblePatcher::Unpatch((void*)(target_function), \ + (void*)(replacement_function), \ + (void*)(original_function)) + +namespace { + +// Function for testing - this is what we patch +// +// NOTE: Because of the way the compiler optimizes this function in +// release builds, we need to use a different input value every time we +// call it within a function, otherwise the compiler will just reuse the +// last calculated incremented value. +int __declspec(noinline) IncrementNumber(int i) { +#ifdef _M_X64 + __int64 i2 = i + 1; + return (int) i2; +#else + return i + 1; +#endif +} + +extern "C" int TooShortFunction(int); + +extern "C" int JumpShortCondFunction(int); + +extern "C" int JumpNearCondFunction(int); + +extern "C" int JumpAbsoluteFunction(int); + +extern "C" int CallNearRelativeFunction(int); + +typedef int (*IncrementingFunc)(int); +IncrementingFunc original_function = NULL; + +int HookIncrementNumber(int i) { + SIDESTEP_ASSERT(original_function != NULL); + int incremented_once = original_function(i); + return incremented_once + 1; +} + +// For the AutoTestingHook test, we can't use original_function, because +// all that is encapsulated. +// This function "increments" by 10, just to set it apart from the other +// functions. +int __declspec(noinline) AutoHookIncrementNumber(int i) { + return i + 10; +} + +}; // namespace + +namespace sidestep { + +bool TestDisassembler() { + unsigned int instruction_size = 0; + sidestep::MiniDisassembler disassembler; + void * target = reinterpret_cast<unsigned char *>(IncrementNumber); + void * new_target = PreamblePatcher::ResolveTarget(target); + if (target != new_target) + target = new_target; + + while (1) { + sidestep::InstructionType instructionType = disassembler.Disassemble( + reinterpret_cast<unsigned char *>(target) + instruction_size, + instruction_size); + if (sidestep::IT_RETURN == instructionType) { + return true; + } + } +} + +bool TestPatchWithLongJump() { + original_function = NULL; + void *p = ::VirtualAlloc(reinterpret_cast<void *>(0x0000020000000000), 4096, + MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); + SIDESTEP_EXPECT_TRUE(p != NULL); + memset(p, 0xcc, 4096); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + (IncrementingFunc) p, + &original_function)); + SIDESTEP_ASSERT((*original_function)(1) == 2); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + (IncrementingFunc)p, + original_function)); + ::VirtualFree(p, 0, MEM_RELEASE); + return true; +} + +bool TestPatchWithPreambleShortCondJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpShortCondFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpShortCondFunction, + (void*)HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleNearRelativeCondJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpNearCondFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpNearCondFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleAbsoluteJump() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(JumpAbsoluteFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(JumpAbsoluteFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchWithPreambleNearRelativeCall() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch( + CallNearRelativeFunction, + HookIncrementNumber, + &original_function)); + (*original_function)(0); + (*original_function)(1); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(CallNearRelativeFunction, + HookIncrementNumber, + original_function)); + return true; +} + +bool TestPatchUsingDynamicStub() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &original_function)); + SIDESTEP_EXPECT_TRUE(original_function); + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 4); + SIDESTEP_EXPECT_TRUE(original_function(3) == 4); + + // Clearbox test to see that the function has been patched. + sidestep::MiniDisassembler disassembler; + unsigned int instruction_size = 0; + SIDESTEP_EXPECT_TRUE(sidestep::IT_JUMP == disassembler.Disassemble( + reinterpret_cast<unsigned char*>(IncrementNumber), + instruction_size)); + + // Since we patched IncrementNumber, its first statement is a + // jmp to the hook function. So verify that we now can not patch + // IncrementNumber because it starts with a jump. +#if 0 + IncrementingFunc dummy = NULL; + // TODO(joi@chromium.org): restore this test once flag is added to + // disable JMP following + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_JUMP_INSTRUCTION == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &dummy)); + + // This test disabled because code in preamble_patcher_with_stub.cc + // asserts before returning the error code -- so there is no way + // to get an error code here, in debug build. + dummy = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_FUNCTION_TOO_SMALL == + sidestep::PreamblePatcher::Patch(TooShortFunction, + HookIncrementNumber, + &dummy)); +#endif + + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + HookIncrementNumber, + original_function)); + return true; +} + +bool PatchThenUnpatch() { + original_function = NULL; + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + sidestep::PreamblePatcher::Patch(IncrementNumber, + HookIncrementNumber, + &original_function)); + SIDESTEP_EXPECT_TRUE(original_function); + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 3); + SIDESTEP_EXPECT_TRUE(original_function(2) == 3); + + SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS == + UNPATCH(IncrementNumber, + HookIncrementNumber, + original_function)); + original_function = NULL; + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool AutoTestingHookTest() { + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + + // Inner scope, so we can test what happens when the AutoTestingHook + // goes out of scope + { + AutoTestingHook hook = MakeTestingHook(IncrementNumber, + AutoHookIncrementNumber); + (void) hook; + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12); + } + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool AutoTestingHookInContainerTest() { + SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2); + + // Inner scope, so we can test what happens when the AutoTestingHook + // goes out of scope + { + AutoTestingHookHolder hook(MakeTestingHookHolder(IncrementNumber, + AutoHookIncrementNumber)); + (void) hook; + SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12); + } + SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4); + + return true; +} + +bool TestPreambleAllocation() { + __int64 diff = 0; + void* p1 = reinterpret_cast<void*>(0x110000000); + void* p2 = reinterpret_cast<void*>(0x810000000); + unsigned char* b1 = PreamblePatcher::AllocPreambleBlockNear(p1); + SIDESTEP_EXPECT_TRUE(b1 != NULL); + diff = reinterpret_cast<__int64>(p1) - reinterpret_cast<__int64>(b1); + // Ensure blocks are within 2GB + SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN); + unsigned char* b2 = PreamblePatcher::AllocPreambleBlockNear(p2); + SIDESTEP_EXPECT_TRUE(b2 != NULL); + diff = reinterpret_cast<__int64>(p2) - reinterpret_cast<__int64>(b2); + SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN); + + // Ensure we're reusing free blocks + unsigned char* b3 = b1; + unsigned char* b4 = b2; + PreamblePatcher::FreePreambleBlock(b1); + PreamblePatcher::FreePreambleBlock(b2); + b1 = PreamblePatcher::AllocPreambleBlockNear(p1); + SIDESTEP_EXPECT_TRUE(b1 == b3); + b2 = PreamblePatcher::AllocPreambleBlockNear(p2); + SIDESTEP_EXPECT_TRUE(b2 == b4); + PreamblePatcher::FreePreambleBlock(b1); + PreamblePatcher::FreePreambleBlock(b2); + + return true; +} + +bool UnitTests() { + return TestPatchWithPreambleNearRelativeCall() && + TestPatchWithPreambleAbsoluteJump() && + TestPatchWithPreambleNearRelativeCondJump() && + TestPatchWithPreambleShortCondJump() && + TestDisassembler() && TestPatchWithLongJump() && + TestPatchUsingDynamicStub() && PatchThenUnpatch() && + AutoTestingHookTest() && AutoTestingHookInContainerTest() && + TestPreambleAllocation(); +} + +}; // namespace sidestep + +int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { + if (size == 0) // not even room for a \0? + return -1; // not what C99 says to do, but what windows does + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} + +int _tmain(int argc, _TCHAR* argv[]) +{ + bool ret = sidestep::UnitTests(); + printf("%s\n", ret ? "PASS" : "FAIL"); + return ret ? 0 : -1; +} + +#pragma optimize("", on) diff --git a/src/third_party/gperftools-2.7/src/windows/preamble_patcher_with_stub.cc b/src/third_party/gperftools-2.7/src/windows/preamble_patcher_with_stub.cc new file mode 100644 index 00000000000..23f9d3a0823 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/preamble_patcher_with_stub.cc @@ -0,0 +1,302 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * Author: Scott Francis + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC +#define ASM_NOP 0x90 +// X64 opcodes +#define ASM_MOVRAX_IMM 0xB8 +#define ASM_REXW 0x48 +#define ASM_JMP 0xFF +#define ASM_JMP_RAX 0xE0 +#define ASM_PUSH 0x68 +#define ASM_RET 0xC3 + +namespace sidestep { + +SideStepError PreamblePatcher::RawPatchWithStub( + void* target_function, + void* replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed) { + if ((NULL == target_function) || + (NULL == replacement_function) || + (NULL == preamble_stub)) { + SIDESTEP_ASSERT(false && + "Invalid parameters - either pTargetFunction or " + "pReplacementFunction or pPreambleStub were NULL."); + return SIDESTEP_INVALID_PARAMETER; + } + + // TODO(V7:joi) Siggi and I just had a discussion and decided that both + // patching and unpatching are actually unsafe. We also discussed a + // method of making it safe, which is to freeze all other threads in the + // process, check their thread context to see if their eip is currently + // inside the block of instructions we need to copy to the stub, and if so + // wait a bit and try again, then unfreeze all threads once we've patched. + // Not implementing this for now since we're only using SideStep for unit + // testing, but if we ever use it for production code this is what we + // should do. + // + // NOTE: Stoyan suggests we can write 8 or even 10 bytes atomically using + // FPU instructions, and on newer processors we could use cmpxchg8b or + // cmpxchg16b. So it might be possible to do the patching/unpatching + // atomically and avoid having to freeze other threads. Note though, that + // doing it atomically does not help if one of the other threads happens + // to have its eip in the middle of the bytes you change while you change + // them. + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + unsigned int required_trampoline_bytes = 0; + const unsigned int kRequiredStubJumpBytes = 5; + const unsigned int kRequiredTargetPatchBytes = 5; + + // Initialize the stub with INT3's just in case. + if (stub_size) { + memset(preamble_stub, 0xcc, stub_size); + } + if (kIs64BitBinary) { + // In 64-bit mode JMP instructions are always relative to RIP. If the + // replacement - target offset is > 2GB, we can't JMP to the replacement + // function. In this case, we're going to use a trampoline - that is, + // we're going to do a relative jump to a small chunk of code in the stub + // that will then do the absolute jump to the replacement function. By + // doing this, we only need to patch 5 bytes in the target function, as + // opposed to patching 12 bytes if we were to do an absolute jump. + // + // Note that the first byte of the trampoline is a NOP instruction. This + // is used as a trampoline signature that will be detected when unpatching + // the function. + // + // jmp <trampoline> + // + // trampoline: + // nop + // mov rax, <replacement_function> + // jmp rax + // + __int64 replacement_target_offset = reinterpret_cast<__int64>( + replacement_function) - reinterpret_cast<__int64>(target) - 5; + if (replacement_target_offset > INT_MAX + || replacement_target_offset < INT_MIN) { + // The stub needs to be within 2GB of the target for the trampoline to + // work! + __int64 trampoline_offset = reinterpret_cast<__int64>(preamble_stub) + - reinterpret_cast<__int64>(target) - 5; + if (trampoline_offset > INT_MAX || trampoline_offset < INT_MIN) { + // We're screwed. + SIDESTEP_ASSERT(false + && "Preamble stub is too far from target to patch."); + return SIDESTEP_UNEXPECTED; + } + required_trampoline_bytes = 13; + } + } + + // Let's disassemble the preamble of the target function to see if we can + // patch, and to see how much of the preamble we need to take. We need 5 + // bytes for our jmp instruction, so let's find the minimum number of + // instructions to get 5 bytes. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + unsigned int stub_bytes = 0; + while (preamble_bytes < kRequiredTargetPatchBytes) { + unsigned int cur_bytes = 0; + InstructionType instruction_type = + disassembler.Disassemble(target + preamble_bytes, cur_bytes); + if (IT_JUMP == instruction_type) { + unsigned int jump_bytes = 0; + SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION; + if (IsShortConditionalJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortConditionalJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); + } else if (IsShortJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); + } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) || + IsNearRelativeJump(target + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) || + IsNearRelativeCall(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchNearJumpOrCall(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &jump_bytes, + stub_size - stub_bytes); + } + if (jump_ret != SIDESTEP_SUCCESS) { + SIDESTEP_ASSERT(false && + "Unable to patch because there is an unhandled branch " + "instruction in the initial preamble bytes."); + return SIDESTEP_JUMP_INSTRUCTION; + } + stub_bytes += jump_bytes; + } else if (IT_RETURN == instruction_type) { + SIDESTEP_ASSERT(false && + "Unable to patch because function is too short"); + return SIDESTEP_FUNCTION_TOO_SMALL; + } else if (IT_GENERIC == instruction_type) { + if (IsMovWithDisplacement(target + preamble_bytes, cur_bytes)) { + unsigned int mov_bytes = 0; + if (PatchMovWithDisplacement(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, &mov_bytes, + stub_size - stub_bytes) + != SIDESTEP_SUCCESS) { + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + stub_bytes += mov_bytes; + } else { + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes), + reinterpret_cast<void*>(target + preamble_bytes), cur_bytes); + stub_bytes += cur_bytes; + } + } else { + SIDESTEP_ASSERT(false && + "Disassembler encountered unsupported instruction " + "(either unused or unknown"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + preamble_bytes += cur_bytes; + } + + if (NULL != bytes_needed) + *bytes_needed = stub_bytes + kRequiredStubJumpBytes + + required_trampoline_bytes; + + // Inv: cbPreamble is the number of bytes (at least 5) that we need to take + // from the preamble to have whole instructions that are 5 bytes or more + // in size total. The size of the stub required is cbPreamble + + // kRequiredStubJumpBytes (5) + required_trampoline_bytes (0 or 13) + if (stub_bytes + kRequiredStubJumpBytes + required_trampoline_bytes + > stub_size) { + SIDESTEP_ASSERT(false); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // Now, make a jmp instruction to the rest of the target function (minus the + // preamble bytes we moved into the stub) and copy it into our preamble-stub. + // find address to jump to, relative to next address after jmp instruction +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int relative_offset_to_target_rest + = ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) - + (preamble_stub + stub_bytes + kRequiredStubJumpBytes)); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // jmp (Jump near, relative, displacement relative to next instruction) + preamble_stub[stub_bytes] = ASM_JMP32REL; + // copy the address + memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes + 1), + reinterpret_cast<void*>(&relative_offset_to_target_rest), 4); + + if (kIs64BitBinary && required_trampoline_bytes != 0) { + // Construct the trampoline + unsigned int trampoline_pos = stub_bytes + kRequiredStubJumpBytes; + preamble_stub[trampoline_pos] = ASM_NOP; + preamble_stub[trampoline_pos + 1] = ASM_REXW; + preamble_stub[trampoline_pos + 2] = ASM_MOVRAX_IMM; + memcpy(reinterpret_cast<void*>(preamble_stub + trampoline_pos + 3), + reinterpret_cast<void*>(&replacement_function), + sizeof(void *)); + preamble_stub[trampoline_pos + 11] = ASM_JMP; + preamble_stub[trampoline_pos + 12] = ASM_JMP_RAX; + + // Now update replacement_function to point to the trampoline + replacement_function = preamble_stub + trampoline_pos; + } + + // Inv: preamble_stub points to assembly code that will execute the + // original function by first executing the first cbPreamble bytes of the + // preamble, then jumping to the rest of the function. + + // Overwrite the first 5 bytes of the target function with a jump to our + // replacement function. + // (Jump near, relative, displacement relative to next instruction) + target[0] = ASM_JMP32REL; + + // Find offset from instruction after jmp, to the replacement function. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int offset_to_replacement_function = + reinterpret_cast<unsigned char*>(replacement_function) - + reinterpret_cast<unsigned char*>(target) - 5; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // complete the jmp instruction + memcpy(reinterpret_cast<void*>(target + 1), + reinterpret_cast<void*>(&offset_to_replacement_function), 4); + + // Set any remaining bytes that were moved to the preamble-stub to INT3 so + // as not to cause confusion (otherwise you might see some strange + // instructions if you look at the disassembly, or even invalid + // instructions). Also, by doing this, we will break into the debugger if + // some code calls into this portion of the code. If this happens, it + // means that this function cannot be patched using this patcher without + // further thought. + if (preamble_bytes > kRequiredTargetPatchBytes) { + memset(reinterpret_cast<void*>(target + kRequiredTargetPatchBytes), + ASM_INT3, preamble_bytes - kRequiredTargetPatchBytes); + } + + // Inv: The memory pointed to by target_function now points to a relative + // jump instruction that jumps over to the preamble_stub. The preamble + // stub contains the first stub_size bytes of the original target + // function's preamble code, followed by a relative jump back to the next + // instruction after the first cbPreamble bytes. + // + // In 64-bit mode the memory pointed to by target_function *may* point to a + // relative jump instruction that jumps to a trampoline which will then + // perform an absolute jump to the replacement function. The preamble stub + // still contains the original target function's preamble code, followed by a + // jump back to the instructions after the first preamble bytes. + // + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep diff --git a/src/third_party/gperftools-2.7/src/windows/shortproc.asm b/src/third_party/gperftools-2.7/src/windows/shortproc.asm new file mode 100644 index 00000000000..7e8e3d78395 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/shortproc.asm @@ -0,0 +1,169 @@ +; Copyright (c) 2011, Google Inc. +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are +; met: +; +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above +; copyright notice, this list of conditions and the following disclaimer +; in the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Google Inc. nor the names of its +; contributors may be used to endorse or promote products derived from +; this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +; +; --- +; Author: Scott Francis +; +; Unit tests for PreamblePatcher
+
+.MODEL small
+
+.CODE
+
+TooShortFunction PROC
+ ret
+TooShortFunction ENDP
+
+JumpShortCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+ int 3
+jumpspot:
+ nop
+ nop
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpShortCondFunction ENDP
+
+JumpNearCondFunction PROC
+ test cl, 1
+ jnz jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpNearCondFunction ENDP
+
+JumpAbsoluteFunction PROC
+ test cl, 1
+ jmp jumpspot
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+jumpspot:
+ nop
+ nop
+ mov rax, 1
+ ret
+JumpAbsoluteFunction ENDP
+
+CallNearRelativeFunction PROC
+ test cl, 1
+ call TooShortFunction
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ mov rdx, 0ffff1111H
+ nop
+ nop
+ nop
+ ret
+CallNearRelativeFunction ENDP
+
+END
diff --git a/src/third_party/gperftools-2.7/src/windows/system-alloc.cc b/src/third_party/gperftools-2.7/src/windows/system-alloc.cc new file mode 100644 index 00000000000..ea1f17d95d7 --- /dev/null +++ b/src/third_party/gperftools-2.7/src/windows/system-alloc.cc @@ -0,0 +1,204 @@ +// Copyright (c) 2013, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Petr Hosek + +#ifndef _WIN32 +# error You should only be including windows/system-alloc.cc in a windows environment! +#endif + +#include <config.h> +#include <windows.h> +#include <algorithm> // std::min +#include <gperftools/malloc_extension.h> +#include "base/logging.h" +#include "base/spinlock.h" +#include "internal_logging.h" +#include "system-alloc.h" + +static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); + +// The current system allocator declaration +SysAllocator* tcmalloc_sys_alloc = NULL; +// Number of bytes taken from system. +size_t TCMalloc_SystemTaken = 0; + +class VirtualSysAllocator : public SysAllocator { +public: + VirtualSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); +}; +static char virtual_space[sizeof(VirtualSysAllocator)]; + +// This is mostly like MmapSysAllocator::Alloc, except it does these weird +// munmap's in the middle of the page, which is forbidden in windows. +void* VirtualSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + // Align on the pagesize boundary + const int pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size = ((size + alignment - 1) / alignment) * alignment; + + // Report the total number of bytes the OS actually delivered. This might be + // greater than |size| because of alignment concerns. The full size is + // necessary so that adjacent spans can be coalesced. + // TODO(antonm): proper processing of alignments + // in actual_size and decommitting. + if (actual_size) { + *actual_size = size; + } + + // We currently do not support alignments larger than the pagesize or + // alignments that are not multiples of the pagesize after being floored. + // If this ability is needed it can be done by the caller (assuming it knows + // the page size). + assert(alignment <= pagesize); + + void* result = VirtualAlloc(0, size, + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); + if (result == NULL) + return NULL; + + // If the result is not aligned memory fragmentation will result which can + // lead to pathological memory use. + assert((reinterpret_cast<uintptr_t>(result) & (alignment - 1)) == 0); + + return result; +} + +#ifdef _MSC_VER + +extern "C" SysAllocator* tc_get_sysalloc_override(SysAllocator *def); +extern "C" SysAllocator* tc_get_sysalloc_default(SysAllocator *def) +{ + return def; +} + +#if defined(_M_IX86) +#pragma comment(linker, "/alternatename:_tc_get_sysalloc_override=_tc_get_sysalloc_default") +#elif defined(_M_X64) +#pragma comment(linker, "/alternatename:tc_get_sysalloc_override=tc_get_sysalloc_default") +#endif + +#else // !_MSC_VER + +extern "C" ATTRIBUTE_NOINLINE +SysAllocator* tc_get_sysalloc_override(SysAllocator *def) +{ + return def; +} + +#endif + +static bool system_alloc_inited = false; +void InitSystemAllocators(void) { + VirtualSysAllocator *alloc = new (virtual_space) VirtualSysAllocator(); + tcmalloc_sys_alloc = tc_get_sysalloc_override(alloc); +} + +extern PERFTOOLS_DLL_DECL +void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + SpinLockHolder lock_holder(&spinlock); + + if (!system_alloc_inited) { + InitSystemAllocators(); + system_alloc_inited = true; + } + + void* result = tcmalloc_sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + if (actual_size) { + TCMalloc_SystemTaken += *actual_size; + } else { + TCMalloc_SystemTaken += size; + } + } + return result; +} + +extern PERFTOOLS_DLL_DECL +bool TCMalloc_SystemRelease(void* start, size_t length) { + if (VirtualFree(start, length, MEM_DECOMMIT)) + return true; + + // The decommit may fail if the memory region consists of allocations + // from more than one call to VirtualAlloc. In this case, fall back to + // using VirtualQuery to retrieve the allocation boundaries and decommit + // them each individually. + + char* ptr = static_cast<char*>(start); + char* end = ptr + length; + MEMORY_BASIC_INFORMATION info; + while (ptr < end) { + size_t resultSize = VirtualQuery(ptr, &info, sizeof(info)); + assert(resultSize == sizeof(info)); + size_t decommitSize = std::min<size_t>(info.RegionSize, end - ptr); + BOOL success = VirtualFree(ptr, decommitSize, MEM_DECOMMIT); + assert(success == TRUE); + ptr += decommitSize; + } + + return true; +} + +extern PERFTOOLS_DLL_DECL +void TCMalloc_SystemCommit(void* start, size_t length) { + if (VirtualAlloc(start, length, MEM_COMMIT, PAGE_READWRITE) == start) + return; + + // The commit may fail if the memory region consists of allocations + // from more than one call to VirtualAlloc. In this case, fall back to + // using VirtualQuery to retrieve the allocation boundaries and commit them + // each individually. + + char* ptr = static_cast<char*>(start); + char* end = ptr + length; + MEMORY_BASIC_INFORMATION info; + while (ptr < end) { + size_t resultSize = VirtualQuery(ptr, &info, sizeof(info)); + assert(resultSize == sizeof(info)); + + size_t commitSize = std::min<size_t>(info.RegionSize, end - ptr); + void* newAddress = VirtualAlloc(ptr, commitSize, MEM_COMMIT, + PAGE_READWRITE); + assert(newAddress == ptr); + ptr += commitSize; + } +} + +bool RegisterSystemAllocator(SysAllocator *allocator, int priority) { + return false; // we don't allow registration on windows, right now +} + +void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { + // We don't dump stats on windows, right now +} diff --git a/src/third_party/scripts/gperftools_get_sources.sh b/src/third_party/scripts/gperftools_get_sources.sh index 6b3b8723407..ede6971ceb6 100644..100755 --- a/src/third_party/scripts/gperftools_get_sources.sh +++ b/src/third_party/scripts/gperftools_get_sources.sh @@ -20,7 +20,7 @@ if [ "$#" -ne 0 ]; then exit 1 fi -VERSION=2.5 +VERSION=2.7 NAME=gperftools TARBALL=$NAME-$VERSION.tar.gz TARBALL_DIR=$NAME-$VERSION @@ -62,8 +62,7 @@ cd $TEMP_DIR # Adjust config.h, See note 2 at top of file mkdir $DEST_DIR/build_$TARGET_UNAME || true -sed "s/.*MALLOC_HOOK_MAYBE_VOLATILE.*/\/* #undef MALLOC_HOOK_MAYBE_VOLATILE *\//" < src/config.h \ - > $DEST_DIR/build_$TARGET_UNAME/config.h +cp src/config.h $DEST_DIR/build_$TARGET_UNAME/config.h # Generate tcmalloc.h # See note 3 at top of file diff --git a/src/third_party/wiredtiger/SConscript b/src/third_party/wiredtiger/SConscript index ff1fde21b10..c315166275e 100644 --- a/src/third_party/wiredtiger/SConscript +++ b/src/third_party/wiredtiger/SConscript @@ -52,7 +52,7 @@ if env.TargetOSIs('windows'): env.Append(CFLAGS=[ "/wd4090" # Ignore warning about mismatched const qualifiers ]) - if env['MONGO_ALLOCATOR'] == 'tcmalloc': + if env['MONGO_ALLOCATOR'] in ['tcmalloc', 'tcmalloc-experimental']: env.InjectThirdPartyIncludePaths(libraries=['gperftools']) env.Append(CPPDEFINES=['HAVE_LIBTCMALLOC']) elif env.TargetOSIs('darwin'): |