diff options
38 files changed, 2004 insertions, 502 deletions
@@ -1,3 +1,13 @@ +Tue Mar 18 14:30:44 2008 Google Inc. <opensource@google.com> + + * google-perftools: version 0.96 release + * major atomicops rewrite; fixed atomic ops code for linux/ppc (vchen) + * nix the stacktrace library; now build structure is simpler (csilvers) + * Speed up heap-checker, and reduce extraneous logging (maxim) + * Improve itimer code for NPTL case (cgd) + * Add source code annotations for use by valgrind, etc (kcc) + * PORTING: Fix high resolution timers for Mac OS X (adlr) + Tue Feb 19 12:01:31 2008 Google Inc. <opensource@google.com> * google-perftools: version 0.95.1 release (bugfix release) @@ -79,7 +79,7 @@ Perftools has been tested on the following systems: Windows XP, Visual Studio 2005 (VC++ 8) (x86) Windows XP, MinGW 5.1.3 (x86) -It works in its full generality on the Linux x86 and x86_64 systems +It works in its full generality on the Linux systems tested (though see 64-bit notes above). Portions of perftools work on the other systems. The basic memory-allocation library, tcmalloc_minimal, works on all systems. The cpu-profiler also works @@ -125,20 +125,14 @@ above, by linking in libtcmalloc_minimal. I have not tested other *BSD systems, but they are probably similar. -** Linux/PPC: - - I've tested on a PowerPC Linux box using qemu against Debian Etch - (4.0). Most of the tests pass. The heap-checker unittest does not - pass for reasons which are not yet clear but seem to be related to - the clone() system call. Heap checking may work properly for - single-threaded programs, though I haven't tested that. - ** Mac OS X: I've tested OS X 10.5 [Leopard], OS X 10.4 [Tiger] and OS X 10.3 - [Panther] on both intel (x86) and PowerPC systems. For Panther/ppc + [Panther] on both intel (x86) and PowerPC systems. For Panther systems, perftools does not work at all: it depends on a header - file, OSAtomic.h, which is new in 10.4. + file, OSAtomic.h, which is new in 10.4. (It's possible to get the + code working for Panther/i386 without too much work; if you're + interested in exploring this, drop an e-mail.) For the other seven systems, the binaries and libraries that successfully build are exactly the same as for FreeBSD. See that diff --git a/Makefile.am b/Makefile.am index 6ac4748..e13086f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -81,9 +81,11 @@ dist_doc_DATA += doc/index.html doc/designstyle.css # This is a 'convenience library' -- it's not actually installed or anything LOGGING_INCLUDES = src/base/logging.h \ src/base/commandlineflags.h \ - src/base/basictypes.h + src/base/basictypes.h \ + src/base/dynamic_annotations.h noinst_LTLIBRARIES += liblogging.la liblogging_la_SOURCES = src/base/logging.cc \ + src/base/dynamic_annotations.cc \ $(LOGGING_INCLUDES) SYSINFO_INCLUDES = src/base/sysinfo.h \ @@ -153,7 +155,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \ src/malloc_hook.cc \ src/tests/low_level_alloc_unittest.cc \ $(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES) -low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la +low_level_alloc_unittest_LDADD = libstacktrace.la if !MINGW TESTS += atomicops_unittest @@ -181,11 +183,9 @@ STACKTRACE_INCLUDES = $(S_STACKTRACE_INCLUDES) $(SG_STACKTRACE_INCLUDES) googleinclude_HEADERS += $(SG_STACKTRACE_INCLUDES) ### Making the library -lib_LTLIBRARIES += libstacktrace.la +noinst_LTLIBRARIES += libstacktrace.la libstacktrace_la_SOURCES = src/stacktrace.cc \ $(STACKTRACE_INCLUDES) -# TODO(csilvers): only add these two things when stacktrace.cc would -# #include "stacktrace_libunwind-inl.h" libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK) STACKTRACE_SYMBOLS = '(GetStackTrace)' libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) @@ -257,19 +257,9 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \ libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) - -# Whenever we link in tcmalloc_minimal, we also need to link in -# libstacktrace.so (we also need libspinlock and liblogging, but those -# are created as .a's, not .so's). libtool should do this for us, via -# the LIBADD above. But on some systems, -rpath doesn't work -# properly, and whatever libtool does fails. So we just manually link -# in -lstacktrace whenever linking in -ltcmalloc_minimal. -# (Note this isn't a problem for an *installed* tcmalloc, because then -# everything lives in /usr/lib or /usr/local/lib, which is on the -# linker search path, so the value of -rpath doesn't matter.) -# Remember tcmalloc should always be linked in last! -LIBTCMALLOC_MINIMAL = libstacktrace.la libtcmalloc_minimal.la + libstacktrace.la + +LIBTCMALLOC_MINIMAL = libtcmalloc_minimal.la ### Unittests @@ -495,12 +485,9 @@ libtcmalloc_la_SOURCES = src/internal_logging.cc \ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) + libstacktrace.la -# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. -# Basically it's to work around systems where --rpath doesn't work right. -# Remember tcmalloc should always be linked in last! -LIBTCMALLOC = libstacktrace.la libtcmalloc.la +LIBTCMALLOC = libtcmalloc.la ### Unittests @@ -518,6 +505,7 @@ tcmalloc_unittest_LDADD = $(LIBTCMALLOC) liblogging.la $(PTHREAD_LIBS) # This makes sure it's safe to link in both tcmalloc and tcmalloc_minimal. # (One would never do this on purpose, but perhaps by accident...) +# We also link in libprofiler to make sure that works too TESTS += tcmalloc_both_unittest tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ src/tests/testutil.h src/tests/testutil.cc \ @@ -525,7 +513,7 @@ tcmalloc_both_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ tcmalloc_both_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) tcmalloc_both_unittest_LDFLAGS = $(PTHREAD_CFLAGS) tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \ - liblogging.la $(PTHREAD_LIBS) + libprofiler.la liblogging.la $(PTHREAD_LIBS) TESTS += tcmalloc_large_unittest tcmalloc_large_unittest_SOURCES = src/tests/tcmalloc_large_unittest.cc @@ -619,7 +607,7 @@ lib_LTLIBRARIES += libprofiler.la libprofiler_la_SOURCES = src/profiler.cc \ src/profiledata.cc \ $(CPU_PROFILER_INCLUDES) -libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la +libprofiler_la_LIBADD = libstacktrace.la # We have to include ProfileData for profiledata_unittest CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)' libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) diff --git a/Makefile.in b/Makefile.in index f47672a..888586b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -78,6 +78,7 @@ EXTRA_PROGRAMS = ptmalloc_unittest1$(EXEEXT) \ # This makes sure it's safe to link in both tcmalloc and tcmalloc_minimal. # (One would never do this on purpose, but perhaps by accident...) +# We also link in libprofiler to make sure that works too ### Unittests @MINGW_FALSE@am__append_12 = tcmalloc_unittest tcmalloc_both_unittest \ @@ -147,14 +148,10 @@ libLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) liblogging_la_LIBADD = am__objects_1 = -am_liblogging_la_OBJECTS = logging.lo $(am__objects_1) +am_liblogging_la_OBJECTS = logging.lo dynamic_annotations.lo \ + $(am__objects_1) liblogging_la_OBJECTS = $(am_liblogging_la_OBJECTS) -@MINGW_FALSE@am__DEPENDENCIES_1 = libspinlock.la libsysinfo.la \ -@MINGW_FALSE@ liblogging.la -@MINGW_TRUE@am__DEPENDENCIES_1 = libwindows.la libsysinfo.la \ -@MINGW_TRUE@ liblogging.la -@MINGW_FALSE@libprofiler_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ -@MINGW_FALSE@ libstacktrace.la +@MINGW_FALSE@libprofiler_la_DEPENDENCIES = libstacktrace.la am__libprofiler_la_SOURCES_DIST = src/profiler.cc src/profiledata.cc \ src/profiledata.h src/getpc.h src/base/basictypes.h \ src/base/commandlineflags.h src/base/googleinit.h \ @@ -163,7 +160,8 @@ am__libprofiler_la_SOURCES_DIST = src/profiler.cc src/profiledata.cc \ src/base/atomicops-internals-macosx.h \ src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ - src/base/atomicops-internals-x86.h src/google/profiler.h \ + src/base/atomicops-internals-x86.h \ + src/base/dynamic_annotations.h src/google/profiler.h \ src/google/stacktrace.h @MINGW_FALSE@am__objects_2 = $(am__objects_1) $(am__objects_1) @MINGW_FALSE@am__objects_3 = $(am__objects_2) $(am__objects_1) @@ -182,17 +180,21 @@ am__libspinlock_la_SOURCES_DIST = src/base/spinlock.cc \ @MINGW_FALSE@ atomicops-internals-x86.lo $(am__objects_1) libspinlock_la_OBJECTS = $(am_libspinlock_la_OBJECTS) @MINGW_FALSE@am_libspinlock_la_rpath = -am__DEPENDENCIES_2 = -libstacktrace_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ - $(am__DEPENDENCIES_1) +am__DEPENDENCIES_1 = +@MINGW_FALSE@am__DEPENDENCIES_2 = libspinlock.la libsysinfo.la \ +@MINGW_FALSE@ liblogging.la +@MINGW_TRUE@am__DEPENDENCIES_2 = libwindows.la libsysinfo.la \ +@MINGW_TRUE@ liblogging.la +libstacktrace_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_2) am__objects_4 = $(am__objects_1) $(am__objects_1) am_libstacktrace_la_OBJECTS = stacktrace.lo $(am__objects_4) libstacktrace_la_OBJECTS = $(am_libstacktrace_la_OBJECTS) libsysinfo_la_LIBADD = am_libsysinfo_la_OBJECTS = sysinfo.lo $(am__objects_1) libsysinfo_la_OBJECTS = $(am_libsysinfo_la_OBJECTS) -@MINGW_FALSE@libtcmalloc_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ -@MINGW_FALSE@ libstacktrace.la $(am__DEPENDENCIES_1) +@MINGW_FALSE@libtcmalloc_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ +@MINGW_FALSE@ libstacktrace.la am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \ src/malloc_hook.cc src/malloc_extension.cc \ @@ -212,9 +214,10 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ - src/google/malloc_hook.h src/google/malloc_extension.h \ - src/google/heap-profiler.h src/google/heap-checker.h \ - src/google/stacktrace.h src/heap-checker-bcad.cc + src/base/dynamic_annotations.h src/google/malloc_hook.h \ + src/google/malloc_extension.h src/google/heap-profiler.h \ + src/google/heap-checker.h src/google/stacktrace.h \ + src/heap-checker-bcad.cc @MINGW_FALSE@am__objects_5 = libtcmalloc_la-system-alloc.lo @MINGW_FALSE@am__objects_6 = libtcmalloc_la-maybe_threads.lo @MINGW_FALSE@am_libtcmalloc_la_OBJECTS = \ @@ -234,8 +237,8 @@ am__libtcmalloc_la_SOURCES_DIST = src/internal_logging.cc \ @MINGW_FALSE@ libtcmalloc_la-heap-checker-bcad.lo libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS) @MINGW_FALSE@am_libtcmalloc_la_rpath = -rpath $(libdir) -libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_2) \ - libstacktrace.la $(am__DEPENDENCIES_1) +libtcmalloc_minimal_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + libstacktrace.la am__libtcmalloc_minimal_la_SOURCES_DIST = src/internal_logging.cc \ src/system-alloc.cc src/memfs_malloc.cc src/tcmalloc.cc \ src/malloc_hook.cc src/malloc_extension.cc \ @@ -310,17 +313,18 @@ am__atomicops_unittest_SOURCES_DIST = src/tests/atomicops_unittest.cc \ src/base/atomicops.h src/base/atomicops-internals-macosx.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ - src/base/commandlineflags.h src/base/basictypes.h + src/base/commandlineflags.h src/base/basictypes.h \ + src/base/dynamic_annotations.h @MINGW_FALSE@am__objects_11 = $(am__objects_1) @MINGW_FALSE@am_atomicops_unittest_OBJECTS = \ @MINGW_FALSE@ atomicops_unittest.$(OBJEXT) $(am__objects_11) atomicops_unittest_OBJECTS = $(am_atomicops_unittest_OBJECTS) -@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) +@MINGW_FALSE@atomicops_unittest_DEPENDENCIES = $(am__DEPENDENCIES_2) am_frag_unittest_OBJECTS = frag_unittest-frag_unittest.$(OBJEXT) frag_unittest_OBJECTS = $(am_frag_unittest_OBJECTS) -am__DEPENDENCIES_3 = libstacktrace.la libtcmalloc_minimal.la +am__DEPENDENCIES_3 = libtcmalloc_minimal.la frag_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - $(am__DEPENDENCIES_2) + $(am__DEPENDENCIES_1) am__getpc_test_SOURCES_DIST = src/tests/getpc_test.cc src/getpc.h @MINGW_FALSE@am_getpc_test_OBJECTS = getpc_test.$(OBJEXT) getpc_test_OBJECTS = $(am_getpc_test_OBJECTS) @@ -335,13 +339,14 @@ am__heap_checker_unittest_SOURCES_DIST = \ src/tests/heap-checker_unittest.cc src/config_for_unittests.h \ src/memory_region_map.h src/base/commandlineflags.h \ src/base/googleinit.h src/google/heap-checker.h \ - src/base/logging.h src/base/basictypes.h + src/base/logging.h src/base/basictypes.h \ + src/base/dynamic_annotations.h @MINGW_FALSE@am_heap_checker_unittest_OBJECTS = heap_checker_unittest-heap-checker_unittest.$(OBJEXT) \ @MINGW_FALSE@ $(am__objects_11) heap_checker_unittest_OBJECTS = $(am_heap_checker_unittest_OBJECTS) -@MINGW_FALSE@am__DEPENDENCIES_4 = libstacktrace.la libtcmalloc.la +@MINGW_FALSE@am__DEPENDENCIES_4 = libtcmalloc.la @MINGW_FALSE@heap_checker_unittest_DEPENDENCIES = \ -@MINGW_FALSE@ $(am__DEPENDENCIES_2) liblogging.la \ +@MINGW_FALSE@ $(am__DEPENDENCIES_1) liblogging.la \ @MINGW_FALSE@ $(am__DEPENDENCIES_4) am__heap_checker_unittest_sh_SOURCES_DIST = \ src/tests/heap-checker_unittest.sh @@ -356,7 +361,7 @@ am__heap_profiler_unittest_SOURCES_DIST = \ @MINGW_FALSE@ $(am__objects_1) heap_profiler_unittest_OBJECTS = $(am_heap_profiler_unittest_OBJECTS) @MINGW_FALSE@heap_profiler_unittest_DEPENDENCIES = \ -@MINGW_FALSE@ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_2) +@MINGW_FALSE@ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_1) am__heap_profiler_unittest_sh_SOURCES_DIST = \ src/tests/heap-profiler_unittest.sh am_heap_profiler_unittest_sh_OBJECTS = @@ -372,20 +377,19 @@ am__low_level_alloc_unittest_SOURCES_DIST = \ src/base/atomicops-internals-linuxppc.h \ src/base/atomicops-internals-x86-msvc.h \ src/base/atomicops-internals-x86.h src/base/logging.h \ - src/base/commandlineflags.h + src/base/commandlineflags.h src/base/dynamic_annotations.h am_low_level_alloc_unittest_OBJECTS = low_level_alloc.$(OBJEXT) \ malloc_hook.$(OBJEXT) low_level_alloc_unittest.$(OBJEXT) \ $(am__objects_4) low_level_alloc_unittest_OBJECTS = \ $(am_low_level_alloc_unittest_OBJECTS) -low_level_alloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_1) \ - libstacktrace.la +low_level_alloc_unittest_DEPENDENCIES = libstacktrace.la am_markidle_unittest_OBJECTS = \ markidle_unittest-markidle_unittest.$(OBJEXT) \ markidle_unittest-testutil.$(OBJEXT) markidle_unittest_OBJECTS = $(am_markidle_unittest_OBJECTS) markidle_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - $(am__DEPENDENCIES_2) + $(am__DEPENDENCIES_1) am_maybe_threads_unittest_sh_OBJECTS = maybe_threads_unittest_sh_OBJECTS = \ $(am_maybe_threads_unittest_sh_OBJECTS) @@ -394,7 +398,7 @@ am_memalign_unittest_OBJECTS = \ memalign_unittest-memalign_unittest.$(OBJEXT) memalign_unittest_OBJECTS = $(am_memalign_unittest_OBJECTS) memalign_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - $(am__DEPENDENCIES_2) + $(am__DEPENDENCIES_1) am_packed_cache_test_OBJECTS = packed-cache_test.$(OBJEXT) packed_cache_test_OBJECTS = $(am_packed_cache_test_OBJECTS) packed_cache_test_LDADD = $(LDADD) @@ -437,7 +441,7 @@ am__profiler3_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \ @MINGW_FALSE@am_profiler3_unittest_OBJECTS = $(am__objects_14) profiler3_unittest_OBJECTS = $(am_profiler3_unittest_OBJECTS) @MINGW_FALSE@profiler3_unittest_DEPENDENCIES = $(am__DEPENDENCIES_5) \ -@MINGW_FALSE@ $(am__DEPENDENCIES_2) +@MINGW_FALSE@ $(am__DEPENDENCIES_1) am__profiler4_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \ src/tests/testutil.h src/tests/testutil.cc \ src/config_for_unittests.h src/google/profiler.h @@ -455,11 +459,11 @@ profiler_unittest_sh_LDADD = $(LDADD) am_ptmalloc_unittest1_OBJECTS = ptmalloc_unittest1-t-test1.$(OBJEXT) \ $(am__objects_1) ptmalloc_unittest1_OBJECTS = $(am_ptmalloc_unittest1_OBJECTS) -ptmalloc_unittest1_DEPENDENCIES = $(am__DEPENDENCIES_2) +ptmalloc_unittest1_DEPENDENCIES = $(am__DEPENDENCIES_1) am_ptmalloc_unittest2_OBJECTS = ptmalloc_unittest2-t-test2.$(OBJEXT) \ $(am__objects_1) ptmalloc_unittest2_OBJECTS = $(am_ptmalloc_unittest2_OBJECTS) -ptmalloc_unittest2_DEPENDENCIES = $(am__DEPENDENCIES_2) +ptmalloc_unittest2_DEPENDENCIES = $(am__DEPENDENCIES_1) am__objects_16 = $(am__objects_4) $(am__objects_1) am_stacktrace_unittest_OBJECTS = stacktrace_unittest.$(OBJEXT) \ $(am__objects_16) @@ -470,7 +474,7 @@ am__system_alloc_unittest_SOURCES_DIST = src/config_for_unittests.h \ @MINGW_FALSE@am_system_alloc_unittest_OBJECTS = system_alloc_unittest-system-alloc_unittest.$(OBJEXT) system_alloc_unittest_OBJECTS = $(am_system_alloc_unittest_OBJECTS) @MINGW_FALSE@system_alloc_unittest_DEPENDENCIES = \ -@MINGW_FALSE@ $(am__DEPENDENCIES_3) $(am__DEPENDENCIES_2) +@MINGW_FALSE@ $(am__DEPENDENCIES_3) $(am__DEPENDENCIES_1) am__tcmalloc_both_unittest_SOURCES_DIST = \ src/tests/tcmalloc_unittest.cc src/tests/testutil.h \ src/tests/testutil.cc src/config_for_unittests.h \ @@ -481,19 +485,20 @@ am__tcmalloc_both_unittest_SOURCES_DIST = \ tcmalloc_both_unittest_OBJECTS = $(am_tcmalloc_both_unittest_OBJECTS) @MINGW_FALSE@tcmalloc_both_unittest_DEPENDENCIES = \ @MINGW_FALSE@ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_3) \ -@MINGW_FALSE@ liblogging.la $(am__DEPENDENCIES_2) +@MINGW_FALSE@ libprofiler.la liblogging.la \ +@MINGW_FALSE@ $(am__DEPENDENCIES_1) am__tcmalloc_large_unittest_SOURCES_DIST = \ src/tests/tcmalloc_large_unittest.cc @MINGW_FALSE@am_tcmalloc_large_unittest_OBJECTS = tcmalloc_large_unittest-tcmalloc_large_unittest.$(OBJEXT) tcmalloc_large_unittest_OBJECTS = \ $(am_tcmalloc_large_unittest_OBJECTS) @MINGW_FALSE@tcmalloc_large_unittest_DEPENDENCIES = \ -@MINGW_FALSE@ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_2) +@MINGW_FALSE@ $(am__DEPENDENCIES_4) $(am__DEPENDENCIES_1) am_tcmalloc_minimal_large_unittest_OBJECTS = tcmalloc_minimal_large_unittest-tcmalloc_large_unittest.$(OBJEXT) tcmalloc_minimal_large_unittest_OBJECTS = \ $(am_tcmalloc_minimal_large_unittest_OBJECTS) tcmalloc_minimal_large_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - $(am__DEPENDENCIES_2) + $(am__DEPENDENCIES_1) am__tcmalloc_minimal_unittest_SOURCES_DIST = \ src/tests/tcmalloc_unittest.cc src/tests/testutil.h \ src/tests/testutil.cc src/config_for_unittests.h \ @@ -504,7 +509,7 @@ am_tcmalloc_minimal_unittest_OBJECTS = \ tcmalloc_minimal_unittest_OBJECTS = \ $(am_tcmalloc_minimal_unittest_OBJECTS) tcmalloc_minimal_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - liblogging.la $(am__DEPENDENCIES_2) + liblogging.la $(am__DEPENDENCIES_1) am__tcmalloc_unittest_SOURCES_DIST = src/tests/tcmalloc_unittest.cc \ src/tcmalloc.h src/tests/testutil.h src/tests/testutil.cc \ src/config_for_unittests.h src/google/malloc_extension.h @@ -514,14 +519,14 @@ am__tcmalloc_unittest_SOURCES_DIST = src/tests/tcmalloc_unittest.cc \ @MINGW_FALSE@ $(am__objects_1) tcmalloc_unittest_OBJECTS = $(am_tcmalloc_unittest_OBJECTS) @MINGW_FALSE@tcmalloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_4) \ -@MINGW_FALSE@ liblogging.la $(am__DEPENDENCIES_2) +@MINGW_FALSE@ liblogging.la $(am__DEPENDENCIES_1) am_thread_dealloc_unittest_OBJECTS = \ thread_dealloc_unittest-thread_dealloc_unittest.$(OBJEXT) \ thread_dealloc_unittest-testutil.$(OBJEXT) thread_dealloc_unittest_OBJECTS = \ $(am_thread_dealloc_unittest_OBJECTS) thread_dealloc_unittest_DEPENDENCIES = $(am__DEPENDENCIES_3) \ - $(am__DEPENDENCIES_2) + $(am__DEPENDENCIES_1) binSCRIPT_INSTALL = $(INSTALL_SCRIPT) SCRIPTS = $(bin_SCRIPTS) $(noinst_SCRIPTS) DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)/src @@ -836,13 +841,12 @@ dist_doc_DATA = AUTHORS COPYING ChangeLog INSTALL NEWS README \ # We'll add to this later, on a library-by-library basis ### Making the library +lib_LTLIBRARIES = libtcmalloc_minimal.la $(am__append_11) +# This is for 'convenience libraries' -- basically just a container for sources ### Making the library -lib_LTLIBRARIES = libstacktrace.la libtcmalloc_minimal.la \ - $(am__append_11) -# This is for 'convenience libraries' -- basically just a container for sources noinst_LTLIBRARIES = liblogging.la libsysinfo.la $(am__append_4) \ - $(am__append_5) + $(am__append_5) libstacktrace.la WINDOWS_PROJECTS = google-perftools.sln \ vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj \ vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj \ @@ -896,9 +900,11 @@ noinst_SCRIPTS = $(maybe_threads_unittest_sh_SOURCES) $(am__append_13) # This is a 'convenience library' -- it's not actually installed or anything LOGGING_INCLUDES = src/base/logging.h \ src/base/commandlineflags.h \ - src/base/basictypes.h + src/base/basictypes.h \ + src/base/dynamic_annotations.h liblogging_la_SOURCES = src/base/logging.cc \ + src/base/dynamic_annotations.cc \ $(LOGGING_INCLUDES) SYSINFO_INCLUDES = src/base/sysinfo.h \ @@ -963,7 +969,7 @@ low_level_alloc_unittest_SOURCES = src/base/low_level_alloc.cc \ src/tests/low_level_alloc_unittest.cc \ $(LOW_LEVEL_ALLOC_UNITTEST_INCLUDES) -low_level_alloc_unittest_LDADD = $(LIBSPINLOCK) libstacktrace.la +low_level_alloc_unittest_LDADD = libstacktrace.la @MINGW_FALSE@ATOMICOPS_UNITTEST_INCLUDES = src/base/atomicops.h \ @MINGW_FALSE@ src/base/atomicops-internals-macosx.h \ @MINGW_FALSE@ src/base/atomicops-internals-x86-msvc.h \ @@ -989,8 +995,6 @@ STACKTRACE_INCLUDES = $(S_STACKTRACE_INCLUDES) $(SG_STACKTRACE_INCLUDES) libstacktrace_la_SOURCES = src/stacktrace.cc \ $(STACKTRACE_INCLUDES) -# TODO(csilvers): only add these two things when stacktrace.cc would -# #include "stacktrace_libunwind-inl.h" libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK) STACKTRACE_SYMBOLS = '(GetStackTrace)' libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) @@ -1042,20 +1046,9 @@ libtcmalloc_minimal_la_SOURCES = src/internal_logging.cc \ libtcmalloc_minimal_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) libtcmalloc_minimal_la_LIBADD = $(PTHREAD_LIBS) \ - libstacktrace.la $(LIBSPINLOCK) - - -# Whenever we link in tcmalloc_minimal, we also need to link in -# libstacktrace.so (we also need libspinlock and liblogging, but those -# are created as .a's, not .so's). libtool should do this for us, via -# the LIBADD above. But on some systems, -rpath doesn't work -# properly, and whatever libtool does fails. So we just manually link -# in -lstacktrace whenever linking in -ltcmalloc_minimal. -# (Note this isn't a problem for an *installed* tcmalloc, because then -# everything lives in /usr/lib or /usr/local/lib, which is on the -# linker search path, so the value of -rpath doesn't matter.) -# Remember tcmalloc should always be linked in last! -LIBTCMALLOC_MINIMAL = libstacktrace.la libtcmalloc_minimal.la + libstacktrace.la + +LIBTCMALLOC_MINIMAL = libtcmalloc_minimal.la tcmalloc_minimal_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ src/tests/testutil.h src/tests/testutil.cc \ $(TCMALLOC_UNITTEST_INCLUDES) @@ -1185,13 +1178,9 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) @MINGW_FALSE@libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) @MINGW_FALSE@libtcmalloc_la_LIBADD = $(PTHREAD_LIBS) \ -@MINGW_FALSE@ libstacktrace.la $(LIBSPINLOCK) - +@MINGW_FALSE@ libstacktrace.la -# See discussion above (under LIBTCMALLOC_MINIMAL) for why we do this. -# Basically it's to work around systems where --rpath doesn't work right. -# Remember tcmalloc should always be linked in last! -@MINGW_FALSE@LIBTCMALLOC = libstacktrace.la libtcmalloc.la +@MINGW_FALSE@LIBTCMALLOC = libtcmalloc.la @MINGW_FALSE@TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \ @MINGW_FALSE@ src/google/malloc_extension.h @@ -1210,7 +1199,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@tcmalloc_both_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) @MINGW_FALSE@tcmalloc_both_unittest_LDFLAGS = $(PTHREAD_CFLAGS) @MINGW_FALSE@tcmalloc_both_unittest_LDADD = $(LIBTCMALLOC) $(LIBTCMALLOC_MINIMAL) \ -@MINGW_FALSE@ liblogging.la $(PTHREAD_LIBS) +@MINGW_FALSE@ libprofiler.la liblogging.la $(PTHREAD_LIBS) @MINGW_FALSE@tcmalloc_large_unittest_SOURCES = src/tests/tcmalloc_large_unittest.cc @MINGW_FALSE@tcmalloc_large_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) @@ -1267,7 +1256,7 @@ ptmalloc_unittest2_LDADD = $(PTHREAD_LIBS) @MINGW_FALSE@ src/profiledata.cc \ @MINGW_FALSE@ $(CPU_PROFILER_INCLUDES) -@MINGW_FALSE@libprofiler_la_LIBADD = $(LIBSPINLOCK) libstacktrace.la +@MINGW_FALSE@libprofiler_la_LIBADD = libstacktrace.la # We have to include ProfileData for profiledata_unittest @MINGW_FALSE@CPU_PROFILER_SYMBOLS = '(ProfilerStart|ProfilerStop|ProfilerEnable|ProfilerDisable|ProfilerFlush|ProfilerRegisterThread|ProfileData)' @MINGW_FALSE@libprofiler_la_LDFLAGS = -export-symbols-regex $(CPU_PROFILER_SYMBOLS) @@ -1413,7 +1402,7 @@ libprofiler.la: $(libprofiler_la_OBJECTS) $(libprofiler_la_DEPENDENCIES) libspinlock.la: $(libspinlock_la_OBJECTS) $(libspinlock_la_DEPENDENCIES) $(CXXLINK) $(am_libspinlock_la_rpath) $(libspinlock_la_LDFLAGS) $(libspinlock_la_OBJECTS) $(libspinlock_la_LIBADD) $(LIBS) libstacktrace.la: $(libstacktrace_la_OBJECTS) $(libstacktrace_la_DEPENDENCIES) - $(CXXLINK) -rpath $(libdir) $(libstacktrace_la_LDFLAGS) $(libstacktrace_la_OBJECTS) $(libstacktrace_la_LIBADD) $(LIBS) + $(CXXLINK) $(libstacktrace_la_LDFLAGS) $(libstacktrace_la_OBJECTS) $(libstacktrace_la_LIBADD) $(LIBS) libsysinfo.la: $(libsysinfo_la_OBJECTS) $(libsysinfo_la_DEPENDENCIES) $(CXXLINK) $(libsysinfo_la_LDFLAGS) $(libsysinfo_la_OBJECTS) $(libsysinfo_la_LIBADD) $(LIBS) libtcmalloc.la: $(libtcmalloc_la_OBJECTS) $(libtcmalloc_la_DEPENDENCIES) @@ -1545,6 +1534,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/addressmap_unittest-addressmap_unittest.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomicops-internals-x86.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomicops_unittest.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dynamic_annotations.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/frag_unittest-frag_unittest.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getpc_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/heap_checker_unittest-heap-checker_unittest.Po@am__quote@ @@ -1706,6 +1696,13 @@ logging.lo: src/base/logging.cc @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o logging.lo `test -f 'src/base/logging.cc' || echo '$(srcdir)/'`src/base/logging.cc +dynamic_annotations.lo: src/base/dynamic_annotations.cc +@am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT dynamic_annotations.lo -MD -MP -MF "$(DEPDIR)/dynamic_annotations.Tpo" -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.cc' || echo '$(srcdir)/'`src/base/dynamic_annotations.cc; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/dynamic_annotations.Tpo" "$(DEPDIR)/dynamic_annotations.Plo"; else rm -f "$(DEPDIR)/dynamic_annotations.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='src/base/dynamic_annotations.cc' object='dynamic_annotations.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o dynamic_annotations.lo `test -f 'src/base/dynamic_annotations.cc' || echo '$(srcdir)/'`src/base/dynamic_annotations.cc + profiler.lo: src/profiler.cc @am__fastdepCXX_TRUE@ if $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT profiler.lo -MD -MP -MF "$(DEPDIR)/profiler.Tpo" -c -o profiler.lo `test -f 'src/profiler.cc' || echo '$(srcdir)/'`src/profiler.cc; \ @am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/profiler.Tpo" "$(DEPDIR)/profiler.Plo"; else rm -f "$(DEPDIR)/profiler.Tpo"; exit 1; fi @@ -41,8 +41,7 @@ CPU PROFILER STACKTRACE -1) Document and advertise libstacktrace -2) Remove dependency on linux/x86 +1) Remove dependency on linux/x86 --- -4 April 2007 +11 March 2008 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.59 for google-perftools 0.95.1. +# Generated by GNU Autoconf 2.59 for google-perftools 0.96. # # Report bugs to <opensource@google.com>. # @@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='google-perftools' PACKAGE_TARNAME='google-perftools' -PACKAGE_VERSION='0.95.1' -PACKAGE_STRING='google-perftools 0.95.1' +PACKAGE_VERSION='0.96' +PACKAGE_STRING='google-perftools 0.96' PACKAGE_BUGREPORT='opensource@google.com' ac_unique_file="README" @@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures google-perftools 0.95.1 to adapt to many kinds of systems. +\`configure' configures google-perftools 0.96 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1021,7 +1021,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of google-perftools 0.95.1:";; + short | recursive ) echo "Configuration of google-perftools 0.96:";; esac cat <<\_ACEOF @@ -1162,7 +1162,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -google-perftools configure 0.95.1 +google-perftools configure 0.96 generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. @@ -1176,7 +1176,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by google-perftools $as_me 0.95.1, which was +It was created by google-perftools $as_me 0.96, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ @@ -1904,7 +1904,7 @@ fi # Define the identity of the package. PACKAGE='google-perftools' - VERSION='0.95.1' + VERSION='0.96' cat >>confdefs.h <<_ACEOF @@ -24092,7 +24092,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by google-perftools $as_me 0.95.1, which was +This file was extended by google-perftools $as_me 0.96, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -24155,7 +24155,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -google-perftools config.status 0.95.1 +google-perftools config.status 0.96 configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 291170e..2249f62 100644 --- a/configure.ac +++ b/configure.ac @@ -4,7 +4,7 @@ # make sure we're interpreted by some minimal autoconf AC_PREREQ(2.57) -AC_INIT(google-perftools, 0.95.1, opensource@google.com) +AC_INIT(google-perftools, 0.96, opensource@google.com) # The argument here is just something that should be in the current directory # (for sanity checking) AC_CONFIG_SRCDIR(README) diff --git a/doc/cpuprofile.html b/doc/cpuprofile.html index d30a8aa..c3ba2de 100644 --- a/doc/cpuprofile.html +++ b/doc/cpuprofile.html @@ -30,7 +30,9 @@ profiler data file format is documented separately, <p>To install the CPU profiler into your executable, add <code>-lprofiler</code> to the link-time step for your executable. (It's also probably possible to add in the profiler at run-time using -<code>LD_PRELOAD</code>, but this isn't necessarily recommended.)</p> +<code>LD_PRELOAD</code>, e.g. +<code>% env LD_PRELOAD="/usr/lib/libprofiler.so" <binary></code>, +but this isn't necessarily recommended.)</p> <p>This does <i>not</i> turn on CPU profiling; it just inserts the code. For that reason, it's practical to just always link @@ -78,6 +80,7 @@ environment variables.</p> <tr valign=top> <td><code>CPUPROFILE_FREQUENCY=<i>x</i></code></td> + <td>default: 100</td> <td> How many interrupts/second the cpu-profiler samples. </td> diff --git a/doc/heapprofile.html b/doc/heapprofile.html index b2d500c..c3b9aa2 100644 --- a/doc/heapprofile.html +++ b/doc/heapprofile.html @@ -41,7 +41,7 @@ application, running the code, and analyzing the output.</p> Also, while we don't necessarily recommend this form of usage, it's possible to add in the profiler at run-time using <code>LD_PRELOAD</code>: -<pre>% env LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary></pre> +<pre>% env LD_PRELOAD="/usr/lib/libtcmalloc.so" <binary></pre> <p>This does <i>not</i> turn on heap profiling; it just inserts the code. For that reason, it's practical to just always link diff --git a/packages/deb/changelog b/packages/deb/changelog index 167d6c0..ae3a422 100644 --- a/packages/deb/changelog +++ b/packages/deb/changelog @@ -1,3 +1,9 @@ +google-perftools (0.96-1) unstable; urgency=low + + * New upstream release. + + -- Google Inc. <opensource@google.com> Tue, 18 Mar 2008 14:30:44 -0700 + google-perftools (0.95-1) unstable; urgency=low * New upstream release. diff --git a/packages/rpm/rpm.spec b/packages/rpm/rpm.spec index 15c7e63..2c16b7c 100644 --- a/packages/rpm/rpm.spec +++ b/packages/rpm/rpm.spec @@ -66,8 +66,6 @@ rm -rf $RPM_BUILD_ROOT %doc doc/cpuprofile.html doc/cpuprofile-fileformat.html %doc doc/pprof-test-big.gif doc/pprof-test.gif doc/pprof-vsnprintf-big.gif doc/pprof-vsnprintf.gif -%{prefix}/lib/libstacktrace.so.0 -%{prefix}/lib/libstacktrace.so.0.0.0 %{prefix}/lib/libtcmalloc.so.0 %{prefix}/lib/libtcmalloc.so.0.0.0 %{prefix}/lib/libtcmalloc_minimal.so.0 @@ -81,9 +79,6 @@ rm -rf $RPM_BUILD_ROOT %defattr(-,root,root) %{prefix}/include/google -%{prefix}/lib/libstacktrace.a -%{prefix}/lib/libstacktrace.la -%{prefix}/lib/libstacktrace.so %{prefix}/lib/libtcmalloc.a %{prefix}/lib/libtcmalloc.la %{prefix}/lib/libtcmalloc.so diff --git a/src/addressmap-inl.h b/src/addressmap-inl.h index a8cbb77..e1ce1bf 100644 --- a/src/addressmap-inl.h +++ b/src/addressmap-inl.h @@ -91,6 +91,14 @@ #include <sys/types.h> // our last best hope #endif +// This class is thread-unsafe -- that is, instances of this class can +// not be accessed concurrently by multiple threads -- because the +// callback function for Iterate() may mutate contained values. If the +// callback functions you pass do not mutate their Value* argument, +// AddressMap can be treated as thread-compatible -- that is, it's +// safe for multiple threads to call "const" methods on this class, +// but not safe for one thread to call const methods on this class +// while another thread is calling non-const methods on the class. template <class Value> class AddressMap { public: @@ -200,6 +208,8 @@ class AddressMap { // Find cluster object for specified address. If not found // and "create" is true, create the object. If not found // and "create" is false, return NULL. + // + // This method is bitwise-const if create is false. Cluster* FindCluster(Number address, bool create) { // Look in hashtable const Number cluster_id = address >> (kBlockBits + kClusterBits); diff --git a/src/base/atomicops-internals-linuxppc.h b/src/base/atomicops-internals-linuxppc.h index 6ddc5a8..09d227a 100644 --- a/src/base/atomicops-internals-linuxppc.h +++ b/src/base/atomicops-internals-linuxppc.h @@ -37,6 +37,10 @@ #ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ #define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ +// int32_t and intptr_t seems to be equal on ppc-linux +// There are no Atomic64 implementations in this file. +typedef int32_t Atomic32; + #define LWSYNC_ON_SMP #define PPC405_ERR77(a, b) #define ISYNC_ON_SMP @@ -44,23 +48,38 @@ /* Adapted from atomic_add in asm-powerpc/atomic.h */ inline int32_t OSAtomicAdd32(int32_t amount, int32_t *value) { - int t; + int32_t t; + __asm__ __volatile__( +"1: lwarx %0,0,%3 # atomic_add\n\ + add %0,%2,%0\n" + PPC405_ERR77(0,%3) +" stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc", "memory"); + return t; +} + +inline int32_t OSAtomicAdd32Barrier(int32_t amount, int32_t *value) { + int32_t t; __asm__ __volatile__( "1: lwarx %0,0,%3 # atomic_add\n\ add %0,%2,%0\n" PPC405_ERR77(0,%3) " stwcx. %0,0,%3 \n\ bne- 1b" + ISYNC_ON_SMP : "=&r" (t), "+m" (*value) : "r" (amount), "r" (value) - : "cc"); - return *value; + : "cc", "memory"); + return t; } /* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */ inline bool OSAtomicCompareAndSwap32(int32_t old_value, int32_t new_value, int32_t *value) { - unsigned int prev; + int32_t prev; __asm__ __volatile__ ( LWSYNC_ON_SMP "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ @@ -69,20 +88,19 @@ inline bool OSAtomicCompareAndSwap32(int32_t old_value, int32_t new_value, PPC405_ERR77(0,%2) " stwcx. %4,0,%2\n\ bne- 1b" - ISYNC_ON_SMP "\n\ 2:" : "=&r" (prev), "+m" (*value) : "r" (value), "r" (old_value), "r" (new_value) : "cc", "memory"); - return true; + return prev == old_value; } /* Adapted from __cmpxchg_u32 in asm-powerpc/atomic.h */ inline int32_t OSAtomicCompareAndSwap32Barrier(int32_t old_value, int32_t new_value, int32_t *value) { - unsigned int prev; + int32_t prev; __asm__ __volatile__ ( LWSYNC_ON_SMP "1: lwarx %0,0,%2 # __cmpxchg_u32\n\ @@ -97,32 +115,27 @@ inline int32_t OSAtomicCompareAndSwap32Barrier(int32_t old_value, : "=&r" (prev), "+m" (*value) : "r" (value), "r" (old_value), "r" (new_value) : "cc", "memory"); - return true; + return prev == old_value; } +namespace base { +namespace subtle { + +typedef int64_t Atomic64; // Defined but unused + inline void MemoryBarrier() { // TODO } -// int32_t and intptr_t seems to be equal on ppc-linux -// therefore we have no extra Atomic32 function versions. -typedef int32_t Atomic32; -typedef intptr_t AtomicWord; - -#define OSAtomicCastIntPtr(p) \ - reinterpret_cast<int32_t *>(const_cast<AtomicWord *>(p)) -#define OSAtomicCompareAndSwapIntPtr OSAtomicCompareAndSwap32 -#define OSAtomicAddIntPtr OSAtomicAdd32 -#define OSAtomicCompareAndSwapIntPtrBarrier OSAtomicCompareAndSwap32Barrier +// 32-bit Versions. - -inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord prev_value; +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; do { - if (OSAtomicCompareAndSwapIntPtr(old_value, new_value, - OSAtomicCastIntPtr(ptr))) { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { return old_value; } prev_value = *ptr; @@ -130,28 +143,33 @@ inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr, return prev_value; } -inline AtomicWord AtomicExchange(volatile AtomicWord *ptr, - AtomicWord new_value) { - AtomicWord old_value; +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; do { old_value = *ptr; - } while (!OSAtomicCompareAndSwapIntPtr(old_value, new_value, - OSAtomicCastIntPtr(ptr))); + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); return old_value; } +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr)); +} -inline AtomicWord AtomicIncrement(volatile AtomicWord *ptr, AtomicWord increment) { - return OSAtomicAddIntPtr(increment, OSAtomicCastIntPtr(ptr)); +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32Barrier(increment, const_cast<Atomic32*>(ptr)); } -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord prev_value; +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; do { - if (OSAtomicCompareAndSwapIntPtrBarrier(old_value, new_value, - OSAtomicCastIntPtr(ptr))) { + if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))) { return old_value; } prev_value = *ptr; @@ -159,35 +177,49 @@ inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr, return prev_value; } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { // The ppc interface does not distinguish between Acquire and // Release memory barriers; they are equivalent. return Acquire_CompareAndSwap(ptr, old_value, new_value); } +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} -inline void Acquire_Store(volatile AtomicWord *ptr, AtomicWord value) { +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { *ptr = value; MemoryBarrier(); } -inline void Release_Store(volatile AtomicWord *ptr, AtomicWord value) { +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { MemoryBarrier(); *ptr = value; } -inline AtomicWord Acquire_Load(volatile const AtomicWord *ptr) { - AtomicWord value = *ptr; +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; MemoryBarrier(); return value; } -inline AtomicWord Release_Load(volatile const AtomicWord *ptr) { +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { MemoryBarrier(); return *ptr; } +} // namespace base::subtle +} // namespace base +// NOTE(vchen): The following is also deprecated. New callers should use +// the base::subtle namespace. +inline void MemoryBarrier() { + base::subtle::MemoryBarrier(); +} #endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H__ diff --git a/src/base/atomicops-internals-macosx.h b/src/base/atomicops-internals-macosx.h index 9d3a486..18c2d3c 100644 --- a/src/base/atomicops-internals-macosx.h +++ b/src/base/atomicops-internals-macosx.h @@ -39,35 +39,86 @@ #define BASE_ATOMICOPS_INTERNALS_MACOSX_H__ typedef int32_t Atomic32; -typedef intptr_t AtomicWord; + +// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different +// on the Mac, even when they are the same size. Similarly, on __ppc64__, +// AtomicWord and Atomic64 are always different. Thus, we need explicit +// casting. +#ifdef __LP64__ +#define AtomicWordCastType base::subtle::Atomic64 +#else +#define AtomicWordCastType Atomic32 +#endif + +#if defined(__LP64__) || defined(__i386__) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif #include <libkern/OSAtomic.h> -#ifdef __LP64__ // Indicates 64-bit pointers under OS -#define OSAtomicCastIntPtr(p) \ - reinterpret_cast<int64_t *>(const_cast<AtomicWord *>(p)) -#define OSAtomicCompareAndSwapIntPtr OSAtomicCompareAndSwap64 -#define OSAtomicAddIntPtr OSAtomicAdd64 -#define OSAtomicCompareAndSwapIntPtrBarrier OSAtomicCompareAndSwap64Barrier -#else -#define OSAtomicCastIntPtr(p) \ - reinterpret_cast<int32_t *>(const_cast<AtomicWord *>(p)) -#define OSAtomicCompareAndSwapIntPtr OSAtomicCompareAndSwap32 -#define OSAtomicAddIntPtr OSAtomicAdd32 -#define OSAtomicCompareAndSwapIntPtrBarrier OSAtomicCompareAndSwap32Barrier +#if !defined(__LP64__) && defined(__ppc__) + +// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC, +// while the underlying assembly instructions are available only some +// implementations of PowerPC. + +// The following inline functions will fail with the error message at compile +// time ONLY IF they are called. So it is safe to use this header if user +// code only calls AtomicWord and Atomic32 operations. +// +// NOTE(vchen): Implementation notes to implement the atomic ops below may +// be found in "PowerPC Virtual Environment Architecture, Book II, +// Version 2.02", January 28, 2005, Appendix B, page 46. Unfortunately, +// extra care must be taken to ensure data are properly 8-byte aligned, and +// that data are returned correctly according to Mac OS X ABI specs. + +inline int64_t OSAtomicCompareAndSwap64( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicCompareAndSwap64Barrier( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue); + OSMemoryBarrier(); + return prev; +} + +inline int64_t OSAtomicAdd64Barrier( + int64_t theAmount, int64_t *theValue) { + int64_t new_val = OSAtomicAdd64(theAmount, theValue); + OSMemoryBarrier(); + return new_val; +} #endif + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + inline void MemoryBarrier() { OSMemoryBarrier(); } -inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord prev_value; +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; do { - if (OSAtomicCompareAndSwapIntPtr(old_value, new_value, - OSAtomicCastIntPtr(ptr))) { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { return old_value; } prev_value = *ptr; @@ -75,28 +126,33 @@ inline AtomicWord CompareAndSwap(volatile AtomicWord *ptr, return prev_value; } -inline AtomicWord AtomicExchange(volatile AtomicWord *ptr, - AtomicWord new_value) { - AtomicWord old_value; +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; do { old_value = *ptr; - } while (!OSAtomicCompareAndSwapIntPtr(old_value, new_value, - OSAtomicCastIntPtr(ptr))); + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); return old_value; } +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr)); +} -inline AtomicWord AtomicIncrement(volatile AtomicWord *ptr, AtomicWord increment) { - return OSAtomicAddIntPtr(increment, OSAtomicCastIntPtr(ptr)); +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32Barrier(increment, const_cast<Atomic32*>(ptr)); } -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord prev_value; +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; do { - if (OSAtomicCompareAndSwapIntPtrBarrier(old_value, new_value, - OSAtomicCastIntPtr(ptr))) { + if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))) { return old_value; } prev_value = *ptr; @@ -104,48 +160,50 @@ inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord *ptr, return prev_value; } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord *ptr, - AtomicWord old_value, - AtomicWord new_value) { - // The lib kern interface does not distinguish between - // Acquire and Release memory barriers; they are equivalent. +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { return Acquire_CompareAndSwap(ptr, old_value, new_value); } +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} -inline void Acquire_Store(volatile AtomicWord *ptr, AtomicWord value) { +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { *ptr = value; MemoryBarrier(); } -inline void Release_Store(volatile AtomicWord *ptr, AtomicWord value) { +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { MemoryBarrier(); *ptr = value; } -inline AtomicWord Acquire_Load(volatile const AtomicWord *ptr) { - AtomicWord value = *ptr; +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; MemoryBarrier(); return value; } -inline AtomicWord Release_Load(volatile const AtomicWord *ptr) { +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { MemoryBarrier(); return *ptr; } +// 64-bit version -// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different -// on the Mac, even when they are the same size. Thus, we always provide -// Atomic32 versions. - -inline Atomic32 CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 prev_value; +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; do { - if (OSAtomicCompareAndSwap32(old_value, new_value, - const_cast<Atomic32*>(ptr))) { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { return old_value; } prev_value = *ptr; @@ -153,27 +211,33 @@ inline Atomic32 CompareAndSwap(volatile Atomic32 *ptr, return prev_value; } -inline Atomic32 AtomicExchange(volatile Atomic32 *ptr, - Atomic32 new_value) { - Atomic32 old_value; +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; do { old_value = *ptr; - } while (!OSAtomicCompareAndSwap32(old_value, new_value, - const_cast<Atomic32*>(ptr))); + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); return old_value; } -inline Atomic32 AtomicIncrement(volatile Atomic32 *ptr, Atomic32 increment) { - return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr)); +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr)); } -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 prev_value; +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64Barrier(increment, const_cast<Atomic64*>(ptr)); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; do { - if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, - const_cast<Atomic32*>(ptr))) { + if (OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))) { return old_value; } prev_value = *ptr; @@ -181,32 +245,116 @@ inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, return prev_value; } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, - Atomic32 old_value, - Atomic32 new_value) { +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + // The lib kern interface does not distinguish between + // Acquire and Release memory barriers; they are equivalent. return Acquire_CompareAndSwap(ptr, old_value, new_value); } +#ifdef __LP64__ -inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { +// 64-bit implementation on 64-bit platform + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { *ptr = value; MemoryBarrier(); } -inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { MemoryBarrier(); *ptr = value; } -inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { - Atomic32 value = *ptr; +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; MemoryBarrier(); return value; } -inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { MemoryBarrier(); return *ptr; } +#else + +// 64-bit implementation on 32-bit platform + +#if defined(__ppc__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__( + "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + __asm__ __volatile__( + "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +#elif defined(__i386__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (*ptr) + : "m" (value) + : "memory", "%mm0"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (value) + : "m" (*ptr) + : "memory", "%mm0"); + return value; +} +#endif + + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} +#endif // __LP64__ + +} // namespace base::subtle +} // namespace base + +// NOTE(vchen): The following is also deprecated. New callers should use +// the base::subtle namespace. +inline void MemoryBarrier() { + base::subtle::MemoryBarrier(); +} #endif // BASE_ATOMICOPS_INTERNALS_MACOSX_H__ diff --git a/src/base/atomicops-internals-x86-msvc.h b/src/base/atomicops-internals-x86-msvc.h index cce120c..4fc2d6e 100644 --- a/src/base/atomicops-internals-x86-msvc.h +++ b/src/base/atomicops-internals-x86-msvc.h @@ -39,78 +39,153 @@ #define BASE_ATOMICOPS_INTERNALS_X86_MSVC_H__ #include "base/basictypes.h" // For COMPILE_ASSERT -typedef intptr_t AtomicWord; -#ifdef _WIN64 -typedef LONG Atomic32; -#else -typedef AtomicWord Atomic32; +typedef int32 Atomic32; + +#if defined(_WIN64) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* #endif -COMPILE_ASSERT(sizeof(AtomicWord) == sizeof(PVOID), atomic_word_is_atomic); +namespace base { +namespace subtle { -inline AtomicWord CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - PVOID result = InterlockedCompareExchangePointer( - reinterpret_cast<volatile PVOID*>(ptr), - reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); - return reinterpret_cast<AtomicWord>(result); -} +typedef int64 Atomic64; -inline AtomicWord AtomicExchange(volatile AtomicWord* ptr, - AtomicWord new_value) { - PVOID result = InterlockedExchangePointer( - const_cast<PVOID*>(reinterpret_cast<volatile PVOID*>(ptr)), - reinterpret_cast<PVOID>(new_value)); - return reinterpret_cast<AtomicWord>(result); -} +// 32-bit low-level operations on any platform -#ifdef _WIN64 -inline Atomic32 AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { - // InterlockedExchangeAdd returns *ptr before being incremented - // and we must return nonzero iff *ptr is nonzero after being - // incremented. - return InterlockedExchangeAdd(ptr, increment) + increment; +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + LONG result = InterlockedCompareExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value), + static_cast<LONG>(old_value)); + return static_cast<Atomic32>(result); } -inline AtomicWord AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment) { - return InterlockedExchangeAdd64( - reinterpret_cast<volatile LONGLONG*>(ptr), - static_cast<LONGLONG>(increment)) + increment; +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + LONG result = InterlockedExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value)); + return static_cast<Atomic32>(result); } -#else -inline AtomicWord AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment) { - return InterlockedExchangeAdd( + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return InterlockedExchangeAdd( reinterpret_cast<volatile LONG*>(ptr), static_cast<LONG>(increment)) + increment; } -#endif -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - return CompareAndSwap(ptr, old_value, new_value); +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - return CompareAndSwap(ptr, old_value, new_value); -} +} // namespace base::subtle +} // namespace base + // In msvc8/vs2005, winnt.h already contains a definition for MemoryBarrier. +// Defined it outside the namespace. #if !(defined(_MSC_VER) && _MSC_VER >= 1400) inline void MemoryBarrier() { - AtomicWord value = 0; - AtomicExchange(&value, 0); // acts as a barrier + Atomic32 value = 0; + base::subtle::NoBarrier_AtomicExchange(&value, 0); + // actually acts as a barrier in thisd implementation } #endif -inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { - AtomicExchange(ptr, value); +namespace base { +namespace subtle { + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + // See comments in Atomic64 version of Release_Store() below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit operations + +#if defined(_WIN64) + +// 64-bit low-level operations on 64-bit platform. + +COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + PVOID result = InterlockedCompareExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + PVOID result = InterlockedExchangePointer( + const_cast<PVOID*>(reinterpret_cast<volatile PVOID*>(ptr)), + reinterpret_cast<PVOID>(new_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return InterlockedExchangeAdd64( + reinterpret_cast<volatile LONGLONG*>(ptr), + static_cast<LONGLONG>(increment)) + increment; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { *ptr = value; // works w/o barrier for current Intel chips as of June 2005 // When new chips come out, check: @@ -121,15 +196,179 @@ inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { // http://developer.intel.com/design/pentium4/manuals/index_new.htm } -inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { - AtomicWord value = *ptr; - MemoryBarrier(); +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; return value; } -inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { MemoryBarrier(); return *ptr; } +#else // defined(_WIN64) + +// 64-bit low-level operations on 32-bit platform + +// TBD(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. + +#include <stdio.h> +#include <stdlib.h> + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { +#if 0 // Not implemented + Atomic64 prev; + __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same + : "=A" (prev) // as contents of ptr: + : "m" (*ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "r" (&new_value) // old *ptr => edx:eax + : "memory", "%ebx", "%ecx"); + return prev; +#else + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { +#if 0 // Not implemented + __asm__ __volatile__( + "movl (%2), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%2), %%ecx\n\t" // ecx:ebx + "0:\n\t" + "movl %1, %%eax\n\t" // Read contents of ptr into + "movl 4%1, %%edx\n\t" // edx:eax + "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A" (new_value) + : "m" (*ptr), + "r" (&new_value) + : "memory", "%ebx", "%ecx"); + return new_value; // Now it's the previous value. +#else + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { +#if 0 // Not implemented + Atomic64 temp = increment; + __asm__ __volatile__( + "0:\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit increment into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "movl (%2), %%eax\n\t" // Read contents of ptr into + "movl 4(%2), %%edx\n\t" // edx:eax + "add %%eax, %%ebx\n\t" // sum => ecx:ebx + "adc %%edx, %%ecx\n\t" // edx:eax still has old *ptr + "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A"(temp), "+m"(*ptr) + : "D" (ptr), "S" (&increment) + : "memory", "%ebx", "%ecx"); + // temp now contains the previous value of *ptr + return temp + increment; +#else + NotImplementedFatalError("NoBarrier_AtomicIncrement"); + return 0; +#endif +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { +#if 0 // Not implemented + Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return new_val; +#else + NotImplementedFatalError("Barrier_AtomicIncrement"); + return 0; +#endif +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { +#if 0 // Not implemented + __asm { + mov mm0, value; // Use mmx reg for 64-bit atomic moves + mov ptr, mm0; + emms; // Empty mmx state to enable FP registers + } +#else + NotImplementedFatalError("NoBarrier_Store"); +#endif +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { +#if 0 // Not implemented + Atomic64 value; + __asm { + mov mm0, ptr; // Use mmx reg for 64-bit atomic moves + mov value, mm0; + emms; // Empty mmx state to enable FP registers + } + return value; +#else + NotImplementedFatalError("NoBarrier_Store"); + return 0; +#endif +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(_WIN64) + + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + #endif // BASE_ATOMICOPS_INTERNALS_X86_MSVC_H__ diff --git a/src/base/atomicops-internals-x86.cc b/src/base/atomicops-internals-x86.cc index 9d61fd7..1a6c24f 100644 --- a/src/base/atomicops-internals-x86.cc +++ b/src/base/atomicops-internals-x86.cc @@ -40,6 +40,12 @@ #include "base/logging.h" #include <string.h> +// This file only makes sense with atomicops-internals-x86.h -- it +// depends on structs that are defined in that file. If atomicops.h +// doesn't sub-include that file, then we aren't needed, and shouldn't +// try to do anything. +#ifdef BASE_ATOMICOPS_INTERNALS_X86_H__ + // Inline cpuid instruction. In PIC compilations, %ebx contains the address // of the global offset table. To avoid breaking such executables, this code // must preserve that register's value across cpuid instructions. @@ -117,3 +123,5 @@ REGISTER_MODULE_INITIALIZER(atomicops_x86, { }); #endif + +#endif /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H__ */ diff --git a/src/base/atomicops-internals-x86.h b/src/base/atomicops-internals-x86.h index db3d4d2..68839cc 100644 --- a/src/base/atomicops-internals-x86.h +++ b/src/base/atomicops-internals-x86.h @@ -38,17 +38,13 @@ #ifndef BASE_ATOMICOPS_INTERNALS_X86_H__ #define BASE_ATOMICOPS_INTERNALS_X86_H__ -typedef intptr_t AtomicWord; typedef int32_t Atomic32; +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* + + +// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it +// already matches Atomic32 or Atomic64, depending on the platform. -// There are a couple places we need to specialize opcodes to account for the -// different AtomicWord sizes on x86_64 and 32-bit platforms. -// This macro is undefined after its last use, below. -#if defined(__x86_64__) -#define ATOMICOPS_WORD_SUFFIX "q" -#else -#define ATOMICOPS_WORD_SUFFIX "l" -#endif // This struct is not part of the public API of this module; clients may not // use it. @@ -62,63 +58,89 @@ struct AtomicOps_x86CPUFeatureStruct { }; extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; -inline AtomicWord CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord prev; - __asm__ __volatile__("lock; cmpxchg" ATOMICOPS_WORD_SUFFIX " %1,%2" + +#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") + + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level operations on any platform. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev; + __asm__ __volatile__("lock; cmpxchgl %1,%2" : "=a" (prev) : "q" (new_value), "m" (*ptr), "0" (old_value) : "memory"); return prev; } -inline AtomicWord AtomicExchange(volatile AtomicWord* ptr, - AtomicWord new_value) { - __asm__ __volatile__("xchg" ATOMICOPS_WORD_SUFFIX " %1,%0" // The lock prefix - : "=r" (new_value) // is implicit for - : "m" (*ptr), "0" (new_value) // xchg. +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) : "memory"); return new_value; // Now it's the previous value. } -inline AtomicWord AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment) { - AtomicWord temp = increment; - __asm__ __volatile__("lock; xadd" ATOMICOPS_WORD_SUFFIX " %0,%1" +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" : "+r" (temp), "+m" (*ptr) : : "memory"); - // temp now contains the previous value of *ptr + // temp now holds the old value of *ptr return temp + increment; } -#undef ATOMICOPS_WORD_SUFFIX - +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now holds the old value of *ptr + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return temp + increment; +} -inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - AtomicWord x = CompareAndSwap(ptr, old_value, new_value); +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { __asm__ __volatile__("lfence" : : : "memory"); } return x; } -inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value) { - return CompareAndSwap(ptr, old_value, new_value); +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); } -#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} #if defined(__x86_64__) +// 64-bit implementations of memory barrier can be simpler, because it +// "mfence" is guaranteed to exist. inline void MemoryBarrier() { __asm__ __volatile__("mfence" : : : "memory"); } -inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { *ptr = value; MemoryBarrier(); } @@ -129,24 +151,103 @@ inline void MemoryBarrier() { if (AtomicOps_Internalx86CPUFeatures.has_sse2) { __asm__ __volatile__("mfence" : : : "memory"); } else { // mfence is faster but not present on PIII - AtomicWord x = 0; - AtomicExchange(&x, 0); + Atomic32 x = 0; + NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII } } -inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { if (AtomicOps_Internalx86CPUFeatures.has_sse2) { *ptr = value; __asm__ __volatile__("mfence" : : : "memory"); } else { - AtomicExchange(ptr, value); + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier on PIII } } - #endif -inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier. + // See comments in Atomic64 version of Release_Store(), below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in Atomic64 version of Release_Store(), below. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +#if defined(__x86_64__) + +// 64-bit low-level operations on 64-bit platform. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; + __asm__ __volatile__("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now contains the previous value of *ptr + return temp + increment; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; + __asm__ __volatile__("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now contains the previous value of *ptr + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return temp + increment; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier // for current AMD/Intel chips as of Jan 2008. // See also Acquire_Load(), below. @@ -165,94 +266,155 @@ inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { // either flushing cache lines or non-temporal cache hints. } -inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { - AtomicWord value = *ptr; // An x86 load acts as a acquire barrier, - // for current AMD/Intel chips as of Jan 2008. - // See also Release_Store(), above. +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. ATOMICOPS_COMPILER_BARRIER(); return value; } -inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { MemoryBarrier(); return *ptr; } -// When Atomic32 and AtomicWord are different types, we need to copy -// the preceding methods for Atomic32. - -#ifndef INT32_EQUALS_INTPTR - -inline Atomic32 CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 prev; - __asm__ __volatile__("lock; cmpxchgl %1,%2" - : "=a" (prev) - : "q" (new_value), "m" (*ptr), "0" (old_value) - : "memory"); +#else // defined(__x86_64__) + +// 64-bit low-level operations on 32-bit platform. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same + : "=A" (prev) // as contents of ptr: + : "m" (*ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "r" (&new_value) // old *ptr => edx:eax + : "memory", "%ebx", "%ecx"); return prev; } -inline Atomic32 AtomicExchange(volatile Atomic32* ptr, - Atomic32 new_value) { - __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. - : "=r" (new_value) - : "m" (*ptr), "0" (new_value) - : "memory"); +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + __asm__ __volatile__( + "movl (%2), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%2), %%ecx\n\t" // ecx:ebx + "0:\n\t" + "movl %1, %%eax\n\t" // Read contents of ptr into + "movl 4%1, %%edx\n\t" // edx:eax + "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=&A" (new_value) + : "m" (*ptr), + "r" (&new_value) + : "memory", "%ebx", "%ecx"); return new_value; // Now it's the previous value. } -inline Atomic32 AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { - Atomic32 temp = increment; - __asm__ __volatile__("lock; xaddl %0,%1" - : "+r" (temp), "+m" (*ptr) - : : "memory"); - // temp now holds the old value of *ptr +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; + __asm__ __volatile__( + "0:\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit increment into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "movl (%2), %%eax\n\t" // Read contents of ptr into + "movl 4(%2), %%edx\n\t" // edx:eax + "add %%eax, %%ebx\n\t" // sum => ecx:ebx + "adc %%edx, %%ecx\n\t" // edx:eax still has old *ptr + "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A"(temp), "+m"(*ptr) + : "D" (ptr), "S" (&increment) + : "memory", "%ebx", "%ecx"); + // temp now contains the previous value of *ptr return temp + increment; } -inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, - Atomic32 new_value) { - Atomic32 x = CompareAndSwap(ptr, old_value, new_value); +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { __asm__ __volatile__("lfence" : : : "memory"); } - return x; + return new_val; } -inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, - Atomic32 new_value) { - return CompareAndSwap(ptr, old_value, new_value); +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (*ptr) + : "m" (value) + : "memory", "%mm0"); } -inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - *ptr = value; +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); MemoryBarrier(); } -inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { ATOMICOPS_COMPILER_BARRIER(); - *ptr = value; // An x86 store acts as a release barrier. - // See comments in AtomicWord version of Release_Store(), above. + NoBarrier_Store(ptr, value); } -inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { - Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. - // See comments in AtomicWord version of Release_Store(), above. +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (value) + : "m" (*ptr) + : "%mm0"); // Do not mark mem as clobbered + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); ATOMICOPS_COMPILER_BARRIER(); return value; } -inline Atomic32 Release_Load(volatile const Atomic32* ptr) { +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { MemoryBarrier(); - return *ptr; + return NoBarrier_Load(ptr); +} + +#endif // defined(__x86_64__) + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return x; } -#endif /* INT32_EQUALS_INTPTR */ +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base #undef ATOMICOPS_COMPILER_BARRIER +// NOTE(vchen): The following is also deprecated. New callers should use +// the base::subtle namespace. +inline void MemoryBarrier() { + base::subtle::MemoryBarrier(); +} #endif // BASE_ATOMICOPS_INTERNALS_X86_H__ diff --git a/src/base/atomicops.h b/src/base/atomicops.h index 81c365f..834f518 100644 --- a/src/base/atomicops.h +++ b/src/base/atomicops.h @@ -31,10 +31,38 @@ * Author: Sanjay Ghemawat */ +// For atomic operations on statistics counters, see atomic_stats_counter.h. +// For atomic operations on sequence numbers, see atomic_sequence_num.h. +// For atomic operations on reference counts, see atomic_refcount.h. + // Some fast atomic operations -- typically with machine-dependent // implementations. This file may need editing as Google code is // ported to different architectures. +// The routines exported by this module are subtle. If you use them, even if +// you get the code right, it will depend on careful reasoning about atomicity +// and memory ordering; it will be less readable, and harder to maintain. If +// you plan to use these routines, you should have a good reason, such as solid +// evidence that performance would otherwise suffer, or there being no +// alternative. You should assume only properties explicitly guaranteed by the +// specifications in this file. You are almost certainly _not_ writing code +// just for the x86; if you assume x86 semantics, x86 hardware bugs and +// implementations on other archtectures will cause your code to break. If you +// do not know what you are doing, avoid these routines, and use a Mutex. +// +// It is incorrect to make direct assignments to/from an atomic variable. +// You should use one of the Load or Store routines. The NoBarrier +// versions are provided when no barriers are needed: +// NoBarrier_Store() +// NoBarrier_Load() +// Although there are currently no compiler enforcement, you are encouraged +// to use these. Moreover, if you choose to use base::subtle::Atomic64 type, +// you MUST use one of the Load or Store routines to get correct behavior +// on 32-bit platforms. +// +// The intent is eventually to put all of these routines in namespace +// base::subtle + #ifndef THREAD_ATOMICOPS_H__ #define THREAD_ATOMICOPS_H__ @@ -43,18 +71,25 @@ // ------------------------------------------------------------------------ // Include the platform specific implementations of the types -// and operations listed below. +// and operations listed below. Implementations are to provide Atomic32 +// and Atomic64 operations. If there is a mismatch between intptr_t and +// the Atomic32 or Atomic64 types for a platform, the platform-specific header +// should define the macro, AtomicWordCastType in a clause similar to the +// following: +// #if ...pointers are 64 bits... +// # define AtomicWordCastType base::subtle::Atomic64 +// #else +// # define AtomicWordCastType Atomic32 +// #endif // TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?) // ------------------------------------------------------------------------ -// macosx.h should work correctly for Darwin/x86 as well, but the -// x86.h version works fine as well, so we'll go with that. // TODO(csilvers): match piii, not just __i386. Also, match k8 -#if defined(__MACH__) && defined(__APPLE__) && defined(__ppc__) +#if defined(__MACH__) && defined(__APPLE__) #include "base/atomicops-internals-macosx.h" #elif defined(__GNUC__) && (defined(__i386) || defined(ARCH_K8)) #include "base/atomicops-internals-x86.h" -#elif defined(__i386) && defined(MSVC) +#elif defined(__i386) && defined(_MSC_VER) #include "base/atomicops-internals-x86-msvc.h" #elif defined(__linux__) && defined(__PPC__) #include "base/atomicops-internals-linuxppc.h" @@ -66,21 +101,19 @@ #include "base/atomicops-internals-x86.h" #endif -// ------------------------------------------------------------------------ -// Commented out type definitions and method declarations for documentation -// of the interface provided by this module. -// ------------------------------------------------------------------------ - -#if 0 - // Signed type that can hold a pointer and supports the atomic ops below, as // well as atomic loads and stores. Instances must be naturally-aligned. typedef intptr_t AtomicWord; -// Signed 32-bit type that supports the atomic ops below, as well as atomic -// loads and stores. Instances must be naturally aligned. This type differs -// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits. -typedef int32_t Atomic32; +#ifdef AtomicWordCastType +// ------------------------------------------------------------------------ +// This section is needed only when explicit type casting is required to +// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32). +// It also serves to document the AtomicWord interface. +// ------------------------------------------------------------------------ + +namespace base { +namespace subtle { // Atomically execute: // result = *ptr; @@ -92,18 +125,36 @@ typedef int32_t Atomic32; // Always return the old value of "*ptr" // // This routine implies no memory barriers. -AtomicWord CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value); +inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return NoBarrier_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} // Atomically store new_value into *ptr, returning the previous value held in // *ptr. This routine implies no memory barriers. -AtomicWord AtomicExchange(volatile AtomicWord* ptr, AtomicWord new_value); +inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return NoBarrier_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} // Atomically increment *ptr by "increment". Returns the new value of // *ptr with the increment applied. This routine implies no memory // barriers. -AtomicWord AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment); +inline AtomicWord NoBarrier_AtomicIncrement(volatile AtomicWord* ptr, + AtomicWord increment) { + return NoBarrier_AtomicIncrement( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment); +} + +inline AtomicWord Barrier_AtomicIncrement(volatile AtomicWord* ptr, + AtomicWord increment) { + return Barrier_AtomicIncrement( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment); +} // ------------------------------------------------------------------------ // These following lower-level operations are typically useful only to people @@ -112,38 +163,221 @@ AtomicWord AtomicIncrement(volatile AtomicWord* ptr, AtomicWord increment); // a store with appropriate memory-ordering instructions. "Acquire" operations // ensure that no later memory access can be reordered ahead of the operation. // "Release" operations ensure that no previous memory access can be reordered -// after the operation. +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. // ------------------------------------------------------------------------ -AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value); -AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, - AtomicWord old_value, - AtomicWord new_value); -void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value); -void Release_Store(volatile AtomicWord* ptr, AtomicWord value); -AtomicWord Acquire_Load(volatile const AtomicWord* ptr); -AtomicWord Release_Load(volatile const AtomicWord* ptr); +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) { + NoBarrier_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) { + return NoBarrier_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +} // namespace base::subtle +} // namespace base +#endif // AtomicWordCastType + +// ------------------------------------------------------------------------ +// Commented out type definitions and method declarations for documentation +// of the interface provided by this module. +// ------------------------------------------------------------------------ + +#if 0 + +// Signed 32-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits. +typedef int32_t Atomic32; // Corresponding operations on Atomic32 -Atomic32 CompareAndSwap(volatile Atomic32* ptr, - Atomic32 old_value, - Atomic32 new_value); -Atomic32 AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); -Atomic32 AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); +namespace base { +namespace subtle { + +// Signed 64-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits. +typedef int64_t Atomic64; + +Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); +Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment); Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value); Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, Atomic32 old_value, Atomic32 new_value); +void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value); void Acquire_Store(volatile Atomic32* ptr, Atomic32 value); void Release_Store(volatile Atomic32* ptr, Atomic32 value); +Atomic32 NoBarrier_Load(volatile const Atomic32* ptr); Atomic32 Acquire_Load(volatile const Atomic32* ptr); Atomic32 Release_Load(volatile const Atomic32* ptr); +// Corresponding operations on Atomic64 +Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); +Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); + +Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value); +void Acquire_Store(volatile Atomic64* ptr, Atomic64 value); +void Release_Store(volatile Atomic64* ptr, Atomic64 value); +Atomic64 NoBarrier_Load(volatile const Atomic64* ptr); +Atomic64 Acquire_Load(volatile const Atomic64* ptr); +Atomic64 Release_Load(volatile const Atomic64* ptr); +} // namespace base::subtle +} // namespace base + void MemoryBarrier(); -#endif +#endif // 0 + + +// ------------------------------------------------------------------------ +// The following are to be deprecated when all uses have been changed to +// use the base::subtle namespace. +// ------------------------------------------------------------------------ + +#ifdef AtomicWordCastType +// AtomicWord versions to be deprecated +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store(ptr, value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store(ptr, value); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load(ptr); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load(ptr); +} +#endif // AtomicWordCastType + +// 32-bit Acquire/Release operations to be deprecated. + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + return base::subtle::Release_Store(ptr, value); +} +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + return base::subtle::Release_Load(ptr); +} + +#ifdef BASE_HAS_ATOMIC64 + +// 64-bit Acquire/Release operations to be deprecated. + +inline base::subtle::Atomic64 Acquire_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline base::subtle::Atomic64 Release_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + return base::subtle::Release_Store(ptr, value); +} +inline base::subtle::Atomic64 Acquire_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline base::subtle::Atomic64 Release_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Release_Load(ptr); +} + +#endif // BASE_HAS_ATOMIC64 #endif // THREAD_ATOMICOPS_H__ diff --git a/src/base/basictypes.h b/src/base/basictypes.h index d9d2774..97f96d6 100644 --- a/src/base/basictypes.h +++ b/src/base/basictypes.h @@ -275,4 +275,20 @@ class AssignAttributeStartEnd { #endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static nistance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, then a constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} + #endif // _BASICTYPES_H_ diff --git a/src/base/cycleclock.h b/src/base/cycleclock.h index 20681cb..a5fecb1 100644 --- a/src/base/cycleclock.h +++ b/src/base/cycleclock.h @@ -50,7 +50,18 @@ struct CycleClock { // This should return the number of cycles since power-on static inline int64 Now() { -#if defined(__i386__) +#if defined(__MACH__) && defined(__APPLE__) + // this goes at the top because we need ALL Macs, regardless + // of architecture, to return the number of "mach time units" + // that have passes since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of macs + // to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). also note that when the Mac sleeps, + // this counter pauses; it does not continue counting, nor resets + // to zero. + return mach_absolute_time(); +#elif defined(__i386__) int64 ret; __asm__ volatile ("rdtsc" : "=A" (ret) ); @@ -79,8 +90,6 @@ struct CycleClock { return itc; #elif defined(_MSC_VER) && defined(_M_IX86) _asm rdtsc -#elif defined(__MACH__) && defined(__APPLE__) - return mach_absolute_time(); #else // We could define __alpha here as well, but it only has a 32-bit // timer (good for like 4 seconds), which isn't very useful. diff --git a/src/base/dynamic_annotations.cc b/src/base/dynamic_annotations.cc new file mode 100644 index 0000000..0f441ed --- /dev/null +++ b/src/base/dynamic_annotations.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#include "base/dynamic_annotations.h" + +// Each function is empty and called (via a macro) only in debug mode. +// The arguments are captured by dynamic tools at runtime. + +extern "C" void AnnotateRWLockCreate(const char *file, int line, void *lock){} +extern "C" void AnnotateRWLockDestroy(const char *file, int line, void *lock){} +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + void *lock, long is_w){} +extern "C" void AnnotateRWLockReleased(const char *file, int line, + void *lock, long is_w){} +extern "C" void AnnotateCondVarWait(const char *file, int line, void *cv, + void *lock){} +extern "C" void AnnotateCondVarSignal(const char *file, int line, void *cv){} +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, void *cv){} +extern "C" void AnnotatePCQCreate(const char *file, int line, void *pcq){} +extern "C" void AnnotatePCQDestroy(const char *file, int line, void *pcq){} +extern "C" void AnnotatePCQPut(const char *file, int line, void *pcq){} +extern "C" void AnnotatePCQGet(const char *file, int line, void *pcq){} +extern "C" void AnnotateNewMemory(const char *file, int line, void *mem, + long size){} +extern "C" void AnnotateExpectRace(const char *file, int line, void *mem, + const char *description){} +extern "C" void AnnotateBenignRace(const char *file, int line, void *mem, + const char *description){} +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + void *mu){} +extern "C" void AnnotateTraceMemory(const char *file, int line, + const void *arg){} +extern "C" void AnnotateNoOp(const char *file, int line, const void *arg){} diff --git a/src/base/dynamic_annotations.h b/src/base/dynamic_annotations.h new file mode 100644 index 0000000..d619ffa --- /dev/null +++ b/src/base/dynamic_annotations.h @@ -0,0 +1,188 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +// This file defines dynamic annotations for use with dynamic analysis +// tool such as valgrind, PIN, etc. +// +// Dynamic annotation is a source code annotation that affects +// the generated code (that is, the annotation is not a comment). +// Each such annotation is attached to a particular +// instruction and/or to a particular object (address) in the program. +// +// The annotations that should be used by users are macros +// (e.g. ANNOTATE_NEW_MEMORY). +// +// Actual implementation of these macros may differ depending on the +// dynamic analysis tool being used. +// +// This file supports the following dynamic analysis tools: +// - None (NDEBUG is defined). +// Macros are defined empty. +// - Helgrind (NDEBUG is not defined). +// Macros are defined as calls to non-inlinable empty functions +// that are intercepted by helgrind. +// +#ifndef _BASE_DYNAMIC_ANNOTATIONS_H__ +#define _BASE_DYNAMIC_ANNOTATIONS_H__ + + +// All the annotation macros are in effect only in debug mode. +#ifndef NDEBUG + + // Report that "lock" has been created. + #define ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + // Report that "lock" is about to be destroyed. + #define ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + // Report that "lock" has been acquired. + // is_w=1 for writer lock, is_w=0 for reader lock. + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + // Report that "lock" is about to be relased. + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + // Report that wait on 'cv' has succeeded and 'lock' is held. + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) + + // Report that wait on 'cv' has succeeded. Variant w/o lock. + #define ANNOTATE_CONDVAR_WAIT(cv) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) + + // Report that we are about to signal on 'cv'. + #define ANNOTATE_CONDVAR_SIGNAL(cv) \ + AnnotateCondVarSignal(__FILE__, __LINE__, cv) + + // Report that we are about to signal_all on 'cv'. + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) + + // Report that "pcq" (ProducerConsumerQueue) has been created. + #define ANNOTATE_PCQ_CREATE(pcq) \ + AnnotatePCQCreate(__FILE__, __LINE__, pcq) + + // Report that "pcq" is about to be destroyed. + #define ANNOTATE_PCQ_DESTROY(pcq) \ + AnnotatePCQDestroy(__FILE__, __LINE__, pcq) + + // Report that we are about to put an element into 'pcq'. + #define ANNOTATE_PCQ_PUT(pcq) \ + AnnotatePCQPut(__FILE__, __LINE__, pcq) + + // Report that we've just got an element from 'pcq'. + #define ANNOTATE_PCQ_GET(pcq) \ + AnnotatePCQGet(__FILE__, __LINE__, pcq) + + // Report that a new memory 'mem' of size 'size' has been allocated. + #define ANNOTATE_NEW_MEMORY(mem, size) \ + AnnotateNewMemory(__FILE__, __LINE__, mem, size) + + // Report that we expect a race on 'mem'. + // To use only in unit tests for a race detector. + #define ANNOTATE_EXPECT_RACE(mem, description) \ + AnnotateExpectRace(__FILE__, __LINE__, mem, description) + + // Report that we may have a benign race on 'mem'. + // Insert at the point where 'mem' exists, preferably close to the point + // where the race happens. + #define ANNOTATE_BENIGN_RACE(mem, description) \ + AnnotateBenignRace(__FILE__, __LINE__, mem, description) + + // Report that the mutex 'mu' will be used with LockWhen/Await. + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // Request to trace every access to 'arg'. + #define ANNOTATE_TRACE_MEMORY(arg) \ + AnnotateTraceMemory(__FILE__, __LINE__, arg) + + // A no-op. Insert where you like to test the interceptors. + #define ANNOTATE_NO_OP(arg) \ + AnnotateNoOp(__FILE__, __LINE__, arg) + +#else // NDEBUG is defined + + #define ANNOTATE_RWLOCK_CREATE(lock) // empty + #define ANNOTATE_RWLOCK_DESTROY(lock) // empty + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty + #define ANNOTATE_CONDVAR_WAIT(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty + #define ANNOTATE_PCQ_CREATE(pcq) // empty + #define ANNOTATE_PCQ_DESTROY(pcq) // empty + #define ANNOTATE_PCQ_PUT(pcq) // empty + #define ANNOTATE_PCQ_GET(pcq) // empty + #define ANNOTATE_NEW_MEMORY(mem, size) // empty + #define ANNOTATE_EXPECT_RACE(mem, description) // empty + #define ANNOTATE_BENIGN_RACE(mem, description) // empty + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty + #define ANNOTATE_TRACE_MEMORY(arg) // empty + #define ANNOTATE_NO_OP(arg) // empty + +#endif // NDEBUG + +// Use the macros above rather than using these functions directly. +extern "C" void AnnotateRWLockCreate(const char *file, int line, void *lock); +extern "C" void AnnotateRWLockDestroy(const char *file, int line, void *lock); +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + void *lock, long is_w); +extern "C" void AnnotateRWLockReleased(const char *file, int line, + void *lock, long is_w); +extern "C" void AnnotateCondVarWait(const char *file, int line, void *cv, + void *lock); +extern "C" void AnnotateCondVarSignal(const char *file, int line, void *cv); +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, void *cv); +extern "C" void AnnotatePCQCreate(const char *file, int line, void *pcq); +extern "C" void AnnotatePCQDestroy(const char *file, int line, void *pcq); +extern "C" void AnnotatePCQPut(const char *file, int line, void *pcq); +extern "C" void AnnotatePCQGet(const char *file, int line, void *pcq); +extern "C" void AnnotateNewMemory(const char *file, int line, void *mem, + long size); +extern "C" void AnnotateExpectRace(const char *file, int line, void *mem, + const char *description); +extern "C" void AnnotateBenignRace(const char *file, int line, void *mem, + const char *description); +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + void *mu); +extern "C" void AnnotateTraceMemory(const char *file, int line, + const void *arg); +extern "C" void AnnotateNoOp(const char *file, int line, const void *arg); + +#endif // _BASE_DYNAMIC_ANNOTATIONS_H__ diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h index 9972806..979452c 100644 --- a/src/base/linux_syscall_support.h +++ b/src/base/linux_syscall_support.h @@ -1325,9 +1325,9 @@ struct kernel_statfs { } LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { - /* For real-time signals, the kernel does not know how to return from - * a signal handler. Instead, it relies on user space to provide a - * restorer function that calls the rt_sigreturn() system call. + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. * Unfortunately, we cannot just reference the glibc version of this * function, as glibc goes out of its way to make it inaccessible. */ @@ -1342,6 +1342,25 @@ struct kernel_statfs { : "i" (__NR_rt_sigreturn)); return res; } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } #elif defined(__x86_64__) /* There are no known problems with any of the _syscallX() macros * currently shipping for x86_64, but we still need to be able to define @@ -2190,14 +2209,16 @@ struct kernel_statfs { * This function must have a "magic" signature that the "gdb" * (and maybe the kernel?) can recognize. */ - struct kernel_sigaction a; - if (act != NULL) { - a = *act; + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; a.sa_flags |= SA_RESTORER; a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else { + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); } - return LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, - (KERNEL_NSIG+7)/8); } LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { @@ -2394,8 +2415,11 @@ struct kernel_statfs { * * TODO: Test whether ARM needs a restorer */ - a.sa_flags |= SA_RESTORER; - a.sa_restorer = LSS_NAME(restore_rt)(); + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } #endif } rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, diff --git a/src/base/low_level_alloc.cc b/src/base/low_level_alloc.cc index 7d1459b..95ecf3b 100644 --- a/src/base/low_level_alloc.cc +++ b/src/base/low_level_alloc.cc @@ -37,6 +37,7 @@ // it should not be used when performance is key. #include "base/low_level_alloc.h" +#include "base/dynamic_annotations.h" #include "base/spinlock.h" #include "base/logging.h" #include <google/malloc_hook.h> @@ -400,12 +401,12 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { void *new_pages = mmap(0, new_pages_size, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); + arena->mu.Lock(); s = reinterpret_cast<AllocList *>(new_pages); s->header.size = new_pages_size; // Pretend the block is allocated; call AddToFreelist() to free it. s->header.magic = Magic(kMagicAllocated, &s->header); s->header.arena = arena; - arena->mu.Lock(); AddToFreelist(&s->levels, arena); // insert new region into free list } AllocList *prev[kMaxLevel]; @@ -426,6 +427,7 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { arena->mu.Unlock(); result = &s->levels; } + ANNOTATE_NEW_MEMORY(result, request); return result; } diff --git a/src/base/spinlock.cc b/src/base/spinlock.cc index 2e601eb..3cce898 100644 --- a/src/base/spinlock.cc +++ b/src/base/spinlock.cc @@ -51,6 +51,9 @@ void SubmitSpinLockProfileData(const void *, int64) {} static int adaptive_spin_count = 0; +const base::LinkerInitialized SpinLock::LINKER_INITIALIZED = + base::LINKER_INITIALIZED; + struct SpinLock_InitHelper { SpinLock_InitHelper() { // On multi-cpu machines, spin for longer before yielding diff --git a/src/base/spinlock.h b/src/base/spinlock.h index 0ad8587..2fd5e87 100644 --- a/src/base/spinlock.h +++ b/src/base/spinlock.h @@ -36,7 +36,7 @@ // half the cost of a Mutex because the unlock just does a store instead // of a compare-and-swap which is expensive). -// Spinlock is async signal safe. +// SpinLock is async signal safe. // If used within a signal handler, all lock holders // should block the signal even outside the signal handler. @@ -46,6 +46,7 @@ #include "config.h" #include "base/basictypes.h" #include "base/atomicops.h" +#include "base/dynamic_annotations.h" class SpinLock { public: @@ -53,15 +54,14 @@ class SpinLock { // Special constructor for use with static SpinLock objects. E.g., // - // static SpinLock lock(SpinLock::LINKER_INITIALIZED); + // static SpinLock lock(base::LINKER_INITIALIZED); // // When intialized using this constructor, we depend on the fact // that the linker has already initialized the memory appropriately. // A SpinLock constructed like this can be freely used from global // initializers without worrying about the order in which global // initializers run. - enum StaticInitializer { LINKER_INITIALIZED }; - explicit SpinLock(StaticInitializer x) { + explicit SpinLock(base::LinkerInitialized x) { // Does nothing; lockword_ is already initialized } @@ -69,10 +69,15 @@ class SpinLock { if (Acquire_CompareAndSwap(&lockword_, 0, 1) != 0) { SlowLock(); } + ANNOTATE_RWLOCK_ACQUIRED(this, 1); } inline bool TryLock() { - return (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0); + bool res = (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0); + if (res) { + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + return res; } inline void Unlock() { @@ -80,7 +85,7 @@ class SpinLock { extern void SubmitSpinLockProfileData(const void *, int64); int64 wait_timestamp = static_cast<uint32>(lockword_); - + ANNOTATE_RWLOCK_RELEASED(this, 1); Release_Store(&lockword_, 0); // Collect contention profile info if this lock was contended. // The lockword_ value indicates when the waiter started waiting @@ -110,8 +115,11 @@ class SpinLock { // Waits this long should be very rare. enum { PROFILE_TIMESTAMP_SHIFT = 7 }; + static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat private: - // Lock-state: 0 means unlocked, 1 means locked + // Lock-state: 0 means unlocked; 1 means locked with no waiters; values + // greater than 1 indicate locked with waiters, where the value is the time + // the first waiter started waiting and is used for contention profiling. volatile AtomicWord lockword_; void SlowLock(); diff --git a/src/base/sysinfo.cc b/src/base/sysinfo.cc index df0452a..ad2cc35 100644 --- a/src/base/sysinfo.cc +++ b/src/base/sysinfo.cc @@ -48,30 +48,34 @@ #include <sys/sysctl.h> #elif defined __sun__ // Solaris #include <procfs.h> // for, e.g., prmap_t -#elif defined _MSC_VER // Windows +#elif defined WIN32 // Windows #include <process.h> // for getpid() (actually, _getpid()) #include <shlwapi.h> // for SHGetValueA() +#include <tlhelp32.h> // for Module32First() #endif #include "base/sysinfo.h" #include "base/commandlineflags.h" #include "base/logging.h" #include "base/cycleclock.h" -#if defined(WIN32) && defined(MODULEENTRY32) +#ifdef WIN32 +#ifdef MODULEENTRY32 // In a change from the usual W-A pattern, there is no A variant of // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. -// We want the original A variants, and this #undef is the only -// way I see to get them. +// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be +// MODULEENTRY32W. These #undefs are the only way I see to get back +// access to the original, ascii struct (and related functions). #undef MODULEENTRY32 #undef Module32First #undef Module32Next #undef PMODULEENTRY32 #undef LPMODULEENTRY32 +#endif /* MODULEENTRY32 */ // MinGW doesn't seem to define this, perhaps some windowsen don't either. #ifndef TH32CS_SNAPMODULE32 #define TH32CS_SNAPMODULE32 0 -#endif -#endif +#endif /* TH32CS_SNAPMODULE32 */ +#endif /* WIN32 */ // Re-run fn until it doesn't cause EINTR. #define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) @@ -335,8 +339,20 @@ static void InitializeSystemInfo() { // TODO(csilvers): also figure out cpuinfo_num_cpus #elif defined(__MACH__) && defined(__APPLE__) - // TODO(csilvers): can we do better than this? - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast<double>(timebase_info.denom) / + static_cast<double>(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; int num_cpus = 0; size_t size = sizeof(num_cpus); diff --git a/src/heap-checker.cc b/src/heap-checker.cc index 432e733..0fea028 100644 --- a/src/heap-checker.cc +++ b/src/heap-checker.cc @@ -1116,7 +1116,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { IgnoreNonThreadLiveObjectsLocked(); } if (live_objects_total) { - RAW_VLOG(0, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", + RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", live_objects_total, live_bytes_total); } // Free these: we made them here and heap_profile never saw them @@ -1229,6 +1229,7 @@ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, // log call stacks to help debug how come something is not a leak HeapProfileTable::AllocInfo alloc; bool r = heap_profile->FindAllocDetails(ptr, &alloc); + r = r; // suppress compiler warning in non-debug mode RAW_DCHECK(r, ""); // sanity RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); for (int i = 0; i < alloc.stack_depth; ++i) { @@ -1377,7 +1378,7 @@ void HeapLeakChecker::DumpProfileLocked(ProfileType profile_type, const void* self_stack_top, size_t* alloc_bytes, size_t* alloc_objects) { - RAW_VLOG(0, "%s check \"%s\"%s", + RAW_VLOG(1, "%s check \"%s\"%s", (profile_type == START_PROFILE ? "Starting" : "At an end point for"), name_, @@ -1640,6 +1641,9 @@ bool HeapLeakChecker::DoNoLeaksOnce(CheckType check_type, size_t end_inuse_allocs; DumpProfileLocked(END_PROFILE, &a_local_var, &end_inuse_bytes, &end_inuse_allocs); + // DumpProfileLocked via IgnoreAllLiveObjectsLocked sets these: + const int64 live_objects = live_objects_total; + const int64 live_bytes = live_bytes_total; const bool use_initial_profile = !(FLAGS_heap_check_before_constructors && this == main_heap_checker); if (!use_initial_profile) { // compare against empty initial profile @@ -1746,8 +1750,10 @@ bool HeapLeakChecker::DoNoLeaksOnce(CheckType check_type, } } } else { - RAW_VLOG(0, "No leaks found for check \"%s\" " - "(but no 100%% guarantee that there aren't any)", name_); + RAW_LOG(INFO, "No leaks found for check \"%s\" " + "(but no 100%% guarantee that there aren't any): " + "found %"PRId64" reachable heap objects of %"PRId64" bytes", + name_, live_objects, live_bytes); } return !see_leaks; } else { @@ -1946,6 +1952,8 @@ void HeapLeakChecker::InternalInitStart() { // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined). // If this happens, it is a BUILD bug to be fixed. + RAW_LOG(WARNING, "Heap leak checker is active -- Performance may suffer"); + if (FLAGS_heap_check != "local") { // Schedule registered heap cleanup atexit(RunHeapCleanups); @@ -1995,7 +2003,7 @@ bool HeapLeakChecker::NoGlobalLeaks() { CheckFullness fullness = check_type == NO_LEAKS ? USE_PPROF : USE_COUNTS; // use pprof if it can help ignore false leaks ReportMode report_mode = FLAGS_heap_check_report ? PPROF_REPORT : NO_REPORT; - RAW_VLOG(0, "Checking for whole-program memory leaks"); + RAW_VLOG(1, "Checking for whole-program memory leaks"); result = main_hc->DoNoLeaks(check_type, fullness, report_mode); } return result; @@ -2090,7 +2098,7 @@ void HeapLeakChecker::BeforeConstructors() { heap_profile = new (Allocator::Allocate(sizeof(HeapProfileTable))) HeapProfileTable(&Allocator::Allocate, &Allocator::Free); heap_checker_lock.Unlock(); - RAW_VLOG(0, "Starting tracking the heap"); + RAW_VLOG(1, "Starting tracking the heap"); heap_checker_on = true; // Run silencing if we are called from the first global c-tor, // not from the first mmap/sbrk/alloc call: @@ -2273,8 +2281,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, val.start_address != value.start_address) { RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: " "(%p, %p, %d) vs. (%p, %p, %d)", - (void*)value.start_address, end_address, - value.max_depth, + (void*)val.start_address, end_address, val.max_depth, start_address, end_address, max_depth); } } diff --git a/src/heap-profile-table.cc b/src/heap-profile-table.cc index ec591b2..b090fc7 100644 --- a/src/heap-profile-table.cc +++ b/src/heap-profile-table.cc @@ -421,7 +421,7 @@ void HeapProfileTable::CleanupOldProfiles(const char* prefix) { const char* fname = g.gl_pathv[i]; if ((strlen(fname) >= prefix_length) && (memcmp(fname, prefix, prefix_length) == 0)) { - RAW_VLOG(0, "Removing old heap profile %s", fname); + RAW_VLOG(1, "Removing old heap profile %s", fname); unlink(fname); } } diff --git a/src/memfs_malloc.cc b/src/memfs_malloc.cc index 93de8bf..21de3ee 100644 --- a/src/memfs_malloc.cc +++ b/src/memfs_malloc.cc @@ -168,7 +168,12 @@ static void InitSystemAllocator() { int hugetlb_fd = open(hugetlbfs_path.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600); - CHECK_ERR(hugetlb_fd); + if (hugetlb_fd == -1) { + RAW_LOG(WARNING, "unable to create memfs_malloc_path file %s: %s", + hugetlbfs_path.c_str(), strerror(errno)); + return; + } + // Cleanup memory on process exit CHECK_ERR(unlink(hugetlbfs_path.c_str())); @@ -2444,6 +2444,16 @@ sub FindLibrary { return $file; } +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + if ($file =~ m|^/| && -f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } + return undef; +} + # Parse text section header of a library using objdump sub ParseTextSectionHeader { my $lib = shift; @@ -2527,10 +2537,17 @@ sub ParseLibraries { $lib = FindLibrary($lib); - my $text = ParseTextSectionHeader($lib); - if (defined($text)) { - my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); - $offset = AddressAdd($offset, $vma_offset); + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } } push(@{$result}, [$lib, $start, $finish, $offset]); @@ -3004,9 +3021,11 @@ sub GetProcedureBoundaries { my $regexp = shift; # For libc libraries, the copy in /usr/lib/debug contains debugging symbols - if ($image =~ m|^/| && -f "/usr/lib/debug$image") { - $image = "/usr/lib/debug$image"; + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; } + my $nm = $obj_tool_map{"nm"}; my $cppfilt = $obj_tool_map{"c++filt"}; diff --git a/src/profiler.cc b/src/profiler.cc index 79cb4f8..1ea5601 100644 --- a/src/profiler.cc +++ b/src/profiler.cc @@ -32,11 +32,6 @@ // Chris Demetriou (refactoring) // // Profile current program by sampling stack-trace every so often -// -// TODO: Detect whether or not setitimer() applies to all threads in -// the process. If so, instead of starting and stopping by changing -// the signal handler, start and stop by calling setitimer() and -// do nothing in the per-thread registration code. #include "config.h" #include "getpc.h" // should be first to get the _GNU_SOURCE dfn @@ -85,9 +80,23 @@ class CpuProfiler { void GetCurrentState(ProfilerState* state); - // Start interval timer for the current thread. We do this for - // every known thread. If profiling is off, the generated signals - // are ignored, otherwise they are captured by prof_handler(). + // Register the current thread with the profiler. This should be + // called only once per thread. + // + // The profiler attempts to determine whether or not timers are + // shared by all threads in the process. (With LinuxThreads, and + // with NPTL on some Linux kernel versions, each thread has separate + // timers.) + // + // On systems which have a separate interval timer for each thread, + // this function starts the timer for the current thread. Profiling + // is disabled by ignoring the resulting signals, and enabled by + // setting their handler to be prof_handler. + // + // Prior to determining whether timers are shared, this function + // will unconditionally start the timer. However, if this function + // determines that timers are shared, then it will stop the timer if + // profiling is not currently enabled. void RegisterThread(); static CpuProfiler instance_; @@ -125,6 +134,32 @@ class CpuProfiler { bool (*filter_)(void*); void* filter_arg_; + // Whether or not the threading system provides interval timers + // that are shared by all threads in a process. + enum { + TIMERS_UNTOUCHED, // No timer initialization attempted yet. + TIMERS_ONE_SET, // First thread has registered and set timer. + TIMERS_SHARED, // Timers are shared by all threads. + TIMERS_SEPARATE // Timers are separate in each thread. + } timer_sharing_; + + // Start the interval timer used for profiling. If the thread + // library shares timers between threads, this is used to enable and + // disable the timer when starting and stopping profiling. If + // timers are not shared, this is used to enable the timer in each + // thread. + void StartTimer(); + + // Stop the interval timer used for profiling. Used only if the + // thread library shares timers between threads. + void StopTimer(); + + // Returns true if the profiling interval timer enabled in the + // current thread. This actually checks the kernel's interval timer + // setting. (It is used to detect whether timers are shared or + // separate.) + bool IsTimerRunning(); + // Sets the timer interrupt signal handler to one that stores the pc. static void EnableHandler(); @@ -141,7 +176,8 @@ class CpuProfiler { CpuProfiler CpuProfiler::instance_; // Initialize profiling: activated if getenv("CPUPROFILE") exists. -CpuProfiler::CpuProfiler() { +CpuProfiler::CpuProfiler() + : timer_sharing_(TIMERS_UNTOUCHED) { // Get frequency of interrupts (if specified) char junk; const char* fr = getenv("CPUPROFILE_FREQUENCY"); @@ -204,6 +240,10 @@ bool CpuProfiler::Start(const char* fname, // with signal delivered to this thread. } + if (timer_sharing_ == TIMERS_SHARED) { + StartTimer(); + } + // Setup handler for SIGPROF interrupts EnableHandler(); @@ -224,11 +264,15 @@ void CpuProfiler::Stop() { // Ignore timer signals. Note that the handler may have just // started and might not have taken signal_lock_ yet. Holding - // signal_lock_ here along with the semantics of collector_.Add() + // signal_lock_ below along with the semantics of collector_.Add() // (which does nothing if collection is not enabled) prevents that // late sample from causing a problem. DisableHandler(); + if (timer_sharing_ == TIMERS_SHARED) { + StopTimer(); + } + { SpinLockHolder sl(&signal_lock_); collector_.Stop(); @@ -273,6 +317,53 @@ void CpuProfiler::GetCurrentState(ProfilerState* state) { } void CpuProfiler::RegisterThread() { + SpinLockHolder cl(&control_lock_); + + // We try to detect whether timers are being shared by setting a + // timer in the first call to this function, then checking whether + // it's set in the second call. + // + // Note that this detection method requires that the first two calls + // to RegisterThread must be made from different threads. (Subsequent + // calls will see timer_sharing_ set to either TIMERS_SEPARATE or + // TIMERS_SHARED, and won't try to detect the timer sharing type.) + // + // Also note that if timer settings were inherited across new thread + // creation but *not* shared, this approach wouldn't work. That's + // not an issue for any Linux threading implementation, and should + // not be a problem for a POSIX-compliant threads implementation. + switch (timer_sharing_) { + case TIMERS_UNTOUCHED: + StartTimer(); + timer_sharing_ = TIMERS_ONE_SET; + break; + case TIMERS_ONE_SET: + // If the timer is running, that means that the main thread's + // timer setup is seen in this (second) thread -- and therefore + // that timers are shared. + if (IsTimerRunning()) { + timer_sharing_ = TIMERS_SHARED; + // If profiling has already been enabled, we have to keep the + // timer running. If not, we disable the timer here and + // re-enable it in start. + if (!collector_.enabled()) { + StopTimer(); + } + } else { + timer_sharing_ = TIMERS_SEPARATE; + StartTimer(); + } + break; + case TIMERS_SHARED: + // Nothing needed. + break; + case TIMERS_SEPARATE: + StartTimer(); + break; + } +} + +void CpuProfiler::StartTimer() { // TODO: Randomize the initial interrupt value? // TODO: Randomize the inter-interrupt period on every interrupt? struct itimerval timer; @@ -282,6 +373,19 @@ void CpuProfiler::RegisterThread() { setitimer(ITIMER_PROF, &timer, 0); } +void CpuProfiler::StopTimer() { + struct itimerval timer; + memset(&timer, 0, sizeof timer); + setitimer(ITIMER_PROF, &timer, 0); +} + +bool CpuProfiler::IsTimerRunning() { + itimerval current_timer; + RAW_CHECK(getitimer(ITIMER_PROF, ¤t_timer) == 0, "getitimer failed"); + return (current_timer.it_value.tv_sec != 0 || + current_timer.it_value.tv_usec != 0); +} + void CpuProfiler::EnableHandler() { struct sigaction sa; sa.sa_sigaction = prof_handler; diff --git a/src/tests/atomicops_unittest.cc b/src/tests/atomicops_unittest.cc index 26fd896..25a518e 100644 --- a/src/tests/atomicops_unittest.cc +++ b/src/tests/atomicops_unittest.cc @@ -34,14 +34,16 @@ #include "base/logging.h" #include "base/atomicops.h" +#define GG_ULONGLONG(x) static_cast<uint64>(x) + template <class AtomicType> static void TestAtomicIncrement() { // For now, we just test single threaded execution - // use a guard value to make sure the AtomicIncrement doesn't go + // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go // outside the expected address bounds. This is in particular to // test that some future change to the asm code doesn't cause the - // 32-bit AtomicIncrement doesn't do the wrong thing on 64-bit + // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit // machines. struct { AtomicType prev_word; @@ -57,55 +59,208 @@ static void TestAtomicIncrement() { s.count = 0; s.next_word = next_word_value; - CHECK_EQ(AtomicIncrement(&s.count, 1), 1); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 1), 1); CHECK_EQ(s.count, 1); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, 2), 3); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 2), 3); CHECK_EQ(s.count, 3); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, 3), 6); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 3), 6); CHECK_EQ(s.count, 6); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, -3), 3); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -3), 3); CHECK_EQ(s.count, 3); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, -2), 1); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -2), 1); CHECK_EQ(s.count, 1); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, -1), 0); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), 0); CHECK_EQ(s.count, 0); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, -1), -1); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), -1); CHECK_EQ(s.count, -1); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, -4), -5); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -4), -5); CHECK_EQ(s.count, -5); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); - CHECK_EQ(AtomicIncrement(&s.count, 5), 0); + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 5), 0); CHECK_EQ(s.count, 0); CHECK_EQ(s.prev_word, prev_word_value); CHECK_EQ(s.next_word, next_word_value); } + +#define NUM_BITS(T) (sizeof(T) * 8) + + +template <class AtomicType> +static void TestCompareAndSwap() { + AtomicType value = 0; + AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1); + CHECK_EQ(1, value); + CHECK_EQ(0, prev); + + // Use test value that has non-zero bits in both halves, more for testing + // 64-bit implementation on 32-bit platforms. + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; + prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5); + CHECK_EQ(k_test_val, value); + CHECK_EQ(k_test_val, prev); + + value = k_test_val; + prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5); + CHECK_EQ(5, value); + CHECK_EQ(k_test_val, prev); +} + + +template <class AtomicType> +static void TestAtomicExchange() { + AtomicType value = 0; + AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1); + CHECK_EQ(1, value); + CHECK_EQ(0, new_value); + + // Use test value that has non-zero bits in both halves, more for testing + // 64-bit implementation on 32-bit platforms. + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; + new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val); + CHECK_EQ(k_test_val, value); + CHECK_EQ(k_test_val, new_value); + + value = k_test_val; + new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5); + CHECK_EQ(5, value); + CHECK_EQ(k_test_val, new_value); +} + + +template <class AtomicType> +static void TestAtomicIncrementBounds() { + // Test at rollover boundary between int_max and int_min + AtomicType test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 1)); + AtomicType value = -1 ^ test_val; + AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); + CHECK_EQ(test_val, value); + CHECK_EQ(value, new_value); + + base::subtle::NoBarrier_AtomicIncrement(&value, -1); + CHECK_EQ(-1 ^ test_val, value); + + // Test at 32-bit boundary for 64-bit atomic type. + test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2); + value = test_val - 1; + new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); + CHECK_EQ(test_val, value); + CHECK_EQ(value, new_value); + + base::subtle::NoBarrier_AtomicIncrement(&value, -1); + CHECK_EQ(test_val - 1, value); +} + +// This is a simple sanity check that values are correct. Not testing +// atomicity +template <class AtomicType> +static void TestStore() { + const AtomicType kVal1 = static_cast<AtomicType>(0xa5a5a5a5a5a5a5a5LL); + const AtomicType kVal2 = static_cast<AtomicType>(-1); + + AtomicType value; + + base::subtle::NoBarrier_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::NoBarrier_Store(&value, kVal2); + CHECK_EQ(kVal2, value); + + base::subtle::Acquire_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::Acquire_Store(&value, kVal2); + CHECK_EQ(kVal2, value); + + base::subtle::Release_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::Release_Store(&value, kVal2); + CHECK_EQ(kVal2, value); +} + +// This is a simple sanity check that values are correct. Not testing +// atomicity +template <class AtomicType> +static void TestLoad() { + const AtomicType kVal1 = static_cast<AtomicType>(0xa5a5a5a5a5a5a5a5LL); + const AtomicType kVal2 = static_cast<AtomicType>(-1); + + AtomicType value; + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::NoBarrier_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::NoBarrier_Load(&value)); + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::Acquire_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::Acquire_Load(&value)); + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::Release_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::Release_Load(&value)); +} + +template <class AtomicType> +static void TestAtomicOps() { + TestCompareAndSwap<AtomicType>(); + TestAtomicExchange<AtomicType>(); + TestAtomicIncrementBounds<AtomicType>(); + TestStore<AtomicType>(); + TestLoad<AtomicType>(); +} + int main(int argc, char** argv) { TestAtomicIncrement<AtomicWord>(); TestAtomicIncrement<Atomic32>(); + + TestAtomicOps<AtomicWord>(); + TestAtomicOps<Atomic32>(); + + // I've commented the Atomic64 tests out for now, because Atomic64 + // doesn't work on x86 systems that are not compiled to support mmx + // registers. Since I want this project to be as portable as + // possible -- that is, not to assume we've compiled for mmx or even + // that the processor supports it -- and we don't actually use + // Atomic64 anywhere, I've commented it out of the test for now. + // (Luckily, if we ever do use Atomic64 by accident, we'll get told + // via a compiler error rather than some obscure runtime failure, so + // this course of action is safe.) + // If we ever *do* want to enable this, try adding -msse (or -mmmx?) + // to the CXXFLAGS in Makefile.am. +#if 0 and defined(BASE_HAS_ATOMIC64) + TestAtomicIncrement<base::subtle::Atomic64>(); + TestAtomicOps<base::subtle::Atomic64>(); +#endif + printf("PASS\n"); return 0; } diff --git a/src/tests/heap-checker-death_unittest.sh b/src/tests/heap-checker-death_unittest.sh index 5e2e8ae..4f4242c 100755 --- a/src/tests/heap-checker-death_unittest.sh +++ b/src/tests/heap-checker-death_unittest.sh @@ -139,7 +139,7 @@ EARLY_MSG="Starting tracking the heap$" Test 60 0 "$EARLY_MSG" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - || exit 5 + PERFTOOLS_VERBOSE=1 || exit 5 Test 60 0 "MemoryRegionMap Init$" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ PERFTOOLS_VERBOSE=2 || exit 6 diff --git a/src/tests/maybe_threads_unittest.sh b/src/tests/maybe_threads_unittest.sh index 1c1a1d0..6e0e5f9 100755 --- a/src/tests/maybe_threads_unittest.sh +++ b/src/tests/maybe_threads_unittest.sh @@ -61,8 +61,15 @@ UNITTEST_DIR=`$UNITTEST_DIR/low_level_alloc_unittest --help 2>&1 \ | awk '{print $2; exit;}' \ | xargs dirname` -# We need to set the library-path too: libtcmalloc depends on libstacktrace -# (Note we try several different names: OS X uses its own libpath varname). -LD_LIBRARY_PATH="$UNITTEST_DIR" DYLD_LIBRARY_PATH="$UNITTEST_DIR" \ -LD_PRELOAD="$UNITTEST_DIR/libtcmalloc_minimal.so" \ - $UNITTEST_DIR/low_level_alloc_unittest +# Figure out where libtcmalloc lives. It should be in UNITTEST_DIR, +# but with libtool it might be in a subdir. +if [ -e "$UNITTEST_DIR/libtcmalloc_minimal.so" ]; then + LIB_PATH="$UNITTEST_DIR/libtcmalloc_minimal.so" +elif [ -e "$UNITTEST_DIR/.libs/libtcmalloc_minimal.so" ]; then + LIB_PATH="$UNITTEST_DIR/.libs/libtcmalloc_minimal.so" +else + echo "Cannot run $0: cannot find libtcmalloc_minimal.so" + exit 2 +fi + +LD_PRELOAD="$LIB_PATH" $UNITTEST_DIR/low_level_alloc_unittest diff --git a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj index 212c860..0a8ba08 100755 --- a/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj +++ b/vsprojects/libtcmalloc_minimal/libtcmalloc_minimal.vcproj @@ -117,6 +117,23 @@ Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
+ RelativePath="..\..\src\base\dynamic_annotations.cc">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="2"/>
+ </FileConfiguration>
+ </File>
+ <File
RelativePath="..\..\src\heap-profile-table.cc">
<FileConfiguration
Name="Debug|Win32">
diff --git a/vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj b/vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj index 85760c1..0c0b8d0 100755 --- a/vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj +++ b/vsprojects/low_level_alloc_unittest/low_level_alloc_unittest.vcproj @@ -113,6 +113,23 @@ Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
+ RelativePath="..\..\src\base\dynamic_annotations.cc">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="2"/>
+ </FileConfiguration>
+ </File>
+ <File
RelativePath="..\..\src\base\logging.cc">
<FileConfiguration
Name="Debug|Win32">
diff --git a/vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj b/vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj index fbee663..eda0f08 100755 --- a/vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj +++ b/vsprojects/tcmalloc_minimal_unittest-static/tcmalloc_minimal_unittest-static.vcproj @@ -113,6 +113,23 @@ Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}">
<File
+ RelativePath="..\..\src\base\dynamic_annotations.cc">
+ <FileConfiguration
+ Name="Debug|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="3"/>
+ </FileConfiguration>
+ <FileConfiguration
+ Name="Release|Win32">
+ <Tool
+ Name="VCCLCompilerTool"
+ AdditionalIncludeDirectories="..\..\src\windows; ..\..\src"
+ RuntimeLibrary="2"/>
+ </FileConfiguration>
+ </File>
+ <File
RelativePath="..\..\src\heap-profile-table.cc">
<FileConfiguration
Name="Debug|Win32">
|