diff options
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | TODO | 1 | ||||
-rwxr-xr-x | build_detect_platform | 49 | ||||
-rw-r--r-- | db/c.cc | 4 | ||||
-rw-r--r-- | db/c_test.c | 6 | ||||
-rw-r--r-- | db/db_bench.cc | 1 | ||||
-rw-r--r-- | db/db_impl.cc | 6 | ||||
-rw-r--r-- | db/db_impl.h | 28 | ||||
-rw-r--r-- | db/db_test.cc | 6 | ||||
-rw-r--r-- | db/version_set.cc | 2 | ||||
-rw-r--r-- | db/version_set.h | 4 | ||||
-rw-r--r-- | doc/bench/db_bench_sqlite3.cc | 2 | ||||
-rw-r--r-- | doc/index.html | 2 | ||||
-rw-r--r-- | helpers/memenv/memenv.cc | 10 | ||||
-rw-r--r-- | include/leveldb/c.h | 10 | ||||
-rw-r--r-- | include/leveldb/db.h | 2 | ||||
-rw-r--r-- | include/leveldb/env.h | 10 | ||||
-rw-r--r-- | port/atomic_pointer.h | 72 | ||||
-rw-r--r-- | port/port_posix.h | 11 | ||||
-rw-r--r-- | port/thread_annotations.h | 59 | ||||
-rw-r--r-- | table/block.cc | 4 | ||||
-rw-r--r-- | util/bloom_test.cc | 5 | ||||
-rw-r--r-- | util/coding.cc | 40 | ||||
-rw-r--r-- | util/env_posix.cc | 101 | ||||
-rw-r--r-- | util/mutexlock.h | 8 |
25 files changed, 382 insertions, 72 deletions
@@ -15,7 +15,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode) #----------------------------------------------- # detect what platform we're building on -$(shell ./build_detect_platform build_config.mk) +$(shell CC=$(CC) CXX=$(CXX) TARGET_OS=$(TARGET_OS) \ + ./build_detect_platform build_config.mk ./) # this file is generated by the previous line to set build flags and sources include build_config.mk @@ -70,7 +71,7 @@ SHARED = $(SHARED1) else # Update db.h if you change these. SHARED_MAJOR = 1 -SHARED_MINOR = 5 +SHARED_MINOR = 6 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) @@ -82,7 +83,7 @@ $(SHARED2): $(SHARED3) endif $(SHARED3): - $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) + $(CXX) $(SOURCES) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(INSTALL_PATH)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -o $(SHARED3) endif # PLATFORM_SHARED_EXT @@ -179,14 +180,14 @@ IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBu .cc.o: mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ + $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) $(DEVICEROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ .c.o: mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ + $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ mkdir -p ios-arm/$(dir $@) $(DEVICEROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ lipo ios-x86/$@ ios-arm/$@ -create -output $@ @@ -7,6 +7,7 @@ db within [start_key..end_key]? For Chrome, deletion of obsolete object stores, etc. can be done in the background anyway, so probably not that important. +- There have been requests for MultiGet. After a range is completely deleted, what gets rid of the corresponding files if we do no future changes to that range. Make diff --git a/build_detect_platform b/build_detect_platform index 959a7d6..83bbe42 100755 --- a/build_detect_platform +++ b/build_detect_platform @@ -23,8 +23,9 @@ # OUTPUT=$1 -if test -z "$OUTPUT"; then - echo "usage: $0 <output-filename>" >&2 +PREFIX=$2 +if test -z "$OUTPUT" || test -z "$PREFIX"; then + echo "usage: $0 <output-filename> <directory_prefix>" >&2 exit 1 fi @@ -55,58 +56,72 @@ PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl," PLATFORM_SHARED_CFLAGS="-fPIC" PLATFORM_SHARED_VERSIONED=true -# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp +MEMCMP_FLAG= +if [ "$CXX" = "g++" ]; then + # Use libc's memcmp instead of GCC's memcmp. This results in ~40% + # performance improvement on readrandom under gcc 4.4.3 on Linux/x86. + MEMCMP_FLAG="-fno-builtin-memcmp" +fi + case "$TARGET_OS" in Darwin) PLATFORM=OS_MACOSX - COMMON_FLAGS="-fno-builtin-memcmp -DOS_MACOSX" + COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX" PLATFORM_SHARED_EXT=dylib PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " PORT_FILE=port/port_posix.cc ;; Linux) PLATFORM=OS_LINUX - COMMON_FLAGS="-fno-builtin-memcmp -pthread -DOS_LINUX" + COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; SunOS) PLATFORM=OS_SOLARIS - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS" PLATFORM_LDFLAGS="-lpthread -lrt" PORT_FILE=port/port_posix.cc ;; FreeBSD) PLATFORM=OS_FREEBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD" PLATFORM_LDFLAGS="-lpthread" PORT_FILE=port/port_posix.cc ;; NetBSD) PLATFORM=OS_NETBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD" PLATFORM_LDFLAGS="-lpthread -lgcc_s" PORT_FILE=port/port_posix.cc ;; OpenBSD) PLATFORM=OS_OPENBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; DragonFly) PLATFORM=OS_DRAGONFLYBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD" PLATFORM_LDFLAGS="-lpthread" PORT_FILE=port/port_posix.cc ;; OS_ANDROID_CROSSCOMPILE) PLATFORM=OS_ANDROID - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" PLATFORM_LDFLAGS="" # All pthread features are in the Android C library PORT_FILE=port/port_posix.cc CROSS_COMPILE=true ;; + HP-UX) + PLATFORM=OS_HPUX + COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX" + PLATFORM_LDFLAGS="-pthread" + PORT_FILE=port/port_posix.cc + # man ld: +h internal_name + PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl," + ;; *) echo "Unknown platform!" >&2 exit 1 @@ -116,11 +131,13 @@ esac # except for the test and benchmark files. By default, find will output a list # of all files matching either rule, so we need to append -print to make the # prune take effect. -DIRS="util db table" +DIRS="$PREFIX/db $PREFIX/util $PREFIX/table" + set -f # temporarily disable globbing so that our patterns aren't expanded PRUNE_TEST="-name *test*.cc -prune" PRUNE_BENCH="-name *_bench.cc -prune" -PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "` +PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "` + set +f # re-enable globbing # The sources consist of the portable files, plus the platform-specific port @@ -133,7 +150,7 @@ if [ "$CROSS_COMPILE" = "true" ]; then true else # If -std=c++0x works, use <cstdatomic>. Otherwise use port_posix.h. - $CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null <<EOF + $CXX $CXXFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null <<EOF #include <cstdatomic> int main() {} EOF @@ -146,7 +163,7 @@ EOF # Test whether Snappy library is installed # http://code.google.com/p/snappy/ - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF + $CXX $CXXFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF #include <snappy.h> int main() {} EOF @@ -156,7 +173,7 @@ EOF fi # Test whether tcmalloc is available - $CXX $CFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null <<EOF + $CXX $CXXFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null <<EOF int main() {} EOF if [ "$?" = 0 ]; then @@ -578,4 +578,8 @@ void leveldb_env_destroy(leveldb_env_t* env) { delete env; } +void leveldb_free(void* ptr) { + free(ptr); +} + } // end extern "C" diff --git a/db/c_test.c b/db/c_test.c index 9792447..9d6dfec 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -204,6 +204,12 @@ int main(int argc, char** argv) { CheckCondition(err != NULL); Free(&err); + StartPhase("leveldb_free"); + db = leveldb_open(options, dbname, &err); + CheckCondition(err != NULL); + leveldb_free(err); + err = NULL; + StartPhase("open"); leveldb_options_set_create_if_missing(options, 1); db = leveldb_open(options, dbname, &err); diff --git a/db/db_bench.cc b/db/db_bench.cc index 21d3e25..7abdf87 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -693,6 +693,7 @@ class Benchmark { options.create_if_missing = !FLAGS_use_existing_db; options.block_cache = cache_; options.write_buffer_size = FLAGS_write_buffer_size; + options.max_open_files = FLAGS_open_files; options.filter_policy = filter_policy_; Status s = DB::Open(options, FLAGS_db, &db_); if (!s.ok()) { diff --git a/db/db_impl.cc b/db/db_impl.cc index 90c1c81..c9de169 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -609,7 +609,11 @@ void DBImpl::BackgroundCall() { assert(bg_compaction_scheduled_); if (!shutting_down_.Acquire_Load()) { Status s = BackgroundCompaction(); - if (!s.ok()) { + if (s.ok()) { + // Success + } else if (shutting_down_.Acquire_Load()) { + // Error most likely due to shutdown; do not wait + } else { // Wait a little bit before retrying background compaction in // case this is an environmental problem and we do not want to // chew up resources for failed compactions for the duration of diff --git a/db/db_impl.h b/db/db_impl.h index 8d2bb34..bd29dd8 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -13,6 +13,7 @@ #include "leveldb/db.h" #include "leveldb/env.h" #include "port/port.h" +#include "port/thread_annotations.h" namespace leveldb { @@ -71,7 +72,7 @@ class DBImpl : public DB { // Recover the descriptor from persistent storage. May do a significant // amount of work to recover recently logged updates. Any changes to // be made to the descriptor are added to *edit. - Status Recover(VersionEdit* edit); + Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_); void MaybeIgnoreError(Status* s) const; @@ -80,27 +81,34 @@ class DBImpl : public DB { // Compact the in-memory write buffer to disk. Switches to a new // log-file/memtable and writes a new descriptor iff successful. - Status CompactMemTable(); + Status CompactMemTable() + EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status RecoverLogFile(uint64_t log_number, VersionEdit* edit, - SequenceNumber* max_sequence); + SequenceNumber* max_sequence) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); - Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base); + Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); - Status MakeRoomForWrite(bool force /* compact even if there is room? */); + Status MakeRoomForWrite(bool force /* compact even if there is room? */) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); WriteBatch* BuildBatchGroup(Writer** last_writer); - void MaybeScheduleCompaction(); + void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); static void BGWork(void* db); void BackgroundCall(); - Status BackgroundCompaction(); - void CleanupCompaction(CompactionState* compact); - Status DoCompactionWork(CompactionState* compact); + Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void CleanupCompaction(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); + Status DoCompactionWork(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status OpenCompactionOutputFile(CompactionState* compact); Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); - Status InstallCompactionResults(CompactionState* compact); + Status InstallCompactionResults(CompactionState* compact) + EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Constant after construction Env* const env_; diff --git a/db/db_test.cc b/db/db_test.cc index 3744d0e..74abd13 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1442,6 +1442,12 @@ TEST(DBTest, DBOpen_Options) { db = NULL; } +TEST(DBTest, Locking) { + DB* db2 = NULL; + Status s = DB::Open(CurrentOptions(), dbname_, &db2); + ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db"; +} + // Check that number of files does not grow when we are out of space TEST(DBTest, NoSpace) { Options options = CurrentOptions(); diff --git a/db/version_set.cc b/db/version_set.cc index cf976b4..bdd4a57 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -865,7 +865,7 @@ Status VersionSet::Recover() { if (edit.has_comparator_ && edit.comparator_ != icmp_.user_comparator()->Name()) { s = Status::InvalidArgument( - edit.comparator_ + "does not match existing comparator ", + edit.comparator_ + " does not match existing comparator ", icmp_.user_comparator()->Name()); } } diff --git a/db/version_set.h b/db/version_set.h index 61c4c99..792899b 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -21,6 +21,7 @@ #include "db/dbformat.h" #include "db/version_edit.h" #include "port/port.h" +#include "port/thread_annotations.h" namespace leveldb { @@ -159,7 +160,8 @@ class VersionSet { // current version. Will release *mu while actually writing to the file. // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() - Status LogAndApply(VersionEdit* edit, port::Mutex* mu); + Status LogAndApply(VersionEdit* edit, port::Mutex* mu) + EXCLUSIVE_LOCKS_REQUIRED(mu); // Recover the last saved descriptor from persistent storage. Status Recover(); diff --git a/doc/bench/db_bench_sqlite3.cc b/doc/bench/db_bench_sqlite3.cc index 256793a..e63aaa8 100644 --- a/doc/bench/db_bench_sqlite3.cc +++ b/doc/bench/db_bench_sqlite3.cc @@ -618,7 +618,7 @@ class Benchmark { ErrorCheck(status); // Execute read statement - while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW); + while ((status = sqlite3_step(read_stmt)) == SQLITE_ROW) {} StepErrorCheck(status); // Reset SQLite statement for another use diff --git a/doc/index.html b/doc/index.html index 521d2ba..cd29341 100644 --- a/doc/index.html +++ b/doc/index.html @@ -408,7 +408,7 @@ The optional <code>FilterPolicy</code> mechanism can be used to reduce the number of disk reads substantially. <pre> leveldb::Options options; - options.filter_policy = NewBloomFilter(10); + options.filter_policy = NewBloomFilterPolicy(10); leveldb::DB* db; leveldb::DB::Open(options, "/tmp/testdb", &db); ... use the database ... diff --git a/helpers/memenv/memenv.cc b/helpers/memenv/memenv.cc index 2082083..5879de1 100644 --- a/helpers/memenv/memenv.cc +++ b/helpers/memenv/memenv.cc @@ -221,6 +221,11 @@ class WritableFileImpl : public WritableFile { FileState* file_; }; +class NoOpLogger : public Logger { + public: + virtual void Logv(const char* format, va_list ap) { } +}; + class InMemoryEnv : public EnvWrapper { public: explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { } @@ -358,6 +363,11 @@ class InMemoryEnv : public EnvWrapper { return Status::OK(); } + virtual Status NewLogger(const std::string& fname, Logger** result) { + *result = new NoOpLogger; + return Status::OK(); + } + private: // Map from filenames to FileState objects, representing a simple file system. typedef std::map<std::string, FileState*> FileSystem; diff --git a/include/leveldb/c.h b/include/leveldb/c.h index 70e3cc6..9cee971 100644 --- a/include/leveldb/c.h +++ b/include/leveldb/c.h @@ -28,6 +28,7 @@ be true on entry: *errptr == NULL *errptr points to a malloc()ed null-terminated error message + (On Windows, *errptr must have been malloc()-ed by this library.) On success, a leveldb routine leaves *errptr unchanged. On failure, leveldb frees the old value of *errptr and set *errptr to a malloc()ed error message. @@ -268,6 +269,15 @@ extern void leveldb_cache_destroy(leveldb_cache_t* cache); extern leveldb_env_t* leveldb_create_default_env(); extern void leveldb_env_destroy(leveldb_env_t*); +/* Utility */ + +/* Calls free(ptr). + REQUIRES: ptr was malloc()-ed and returned by one of the routines + in this file. Note that in certain cases (typically on Windows), you + may need to call this routine instead of free(ptr) to dispose of + malloc()-ed memory returned by this library. */ +extern void leveldb_free(void* ptr); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/include/leveldb/db.h b/include/leveldb/db.h index ed56b87..a50ac69 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -14,7 +14,7 @@ namespace leveldb { // Update Makefile if you change these static const int kMajorVersion = 1; -static const int kMinorVersion = 5; +static const int kMinorVersion = 6; struct Options; struct ReadOptions; diff --git a/include/leveldb/env.h b/include/leveldb/env.h index 2720667..fa32289 100644 --- a/include/leveldb/env.h +++ b/include/leveldb/env.h @@ -175,6 +175,11 @@ class SequentialFile { // // REQUIRES: External synchronization virtual Status Skip(uint64_t n) = 0; + + private: + // No copying allowed + SequentialFile(const SequentialFile&); + void operator=(const SequentialFile&); }; // A file abstraction for randomly reading the contents of a file. @@ -194,6 +199,11 @@ class RandomAccessFile { // Safe for concurrent use by multiple threads. virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; + + private: + // No copying allowed + RandomAccessFile(const RandomAccessFile&); + void operator=(const RandomAccessFile&); }; // A file abstraction for sequential writing. The implementation diff --git a/port/atomic_pointer.h b/port/atomic_pointer.h index c58bffb..e17bf43 100644 --- a/port/atomic_pointer.h +++ b/port/atomic_pointer.h @@ -36,6 +36,8 @@ #define ARCH_CPU_X86_FAMILY 1 #elif defined(__ARMEL__) #define ARCH_CPU_ARM_FAMILY 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) +#define ARCH_CPU_PPC_FAMILY 1 #endif namespace leveldb { @@ -91,6 +93,15 @@ inline void MemoryBarrier() { } #define LEVELDB_HAVE_MEMORY_BARRIER +// PPC +#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__) +inline void MemoryBarrier() { + // TODO for some powerpc expert: is there a cheaper suitable variant? + // Perhaps by having separate barriers for acquire and release ops. + asm volatile("sync" : : : "memory"); +} +#define LEVELDB_HAVE_MEMORY_BARRIER + #endif // AtomicPointer built using platform-specific MemoryBarrier() @@ -136,6 +147,66 @@ class AtomicPointer { } }; +// Atomic pointer based on sparc memory barriers +#elif defined(__sparcv9) && defined(__GNUC__) +class AtomicPointer { + private: + void* rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* v) : rep_(v) { } + inline void* Acquire_Load() const { + void* val; + __asm__ __volatile__ ( + "ldx [%[rep_]], %[val] \n\t" + "membar #LoadLoad|#LoadStore \n\t" + : [val] "=r" (val) + : [rep_] "r" (&rep_) + : "memory"); + return val; + } + inline void Release_Store(void* v) { + __asm__ __volatile__ ( + "membar #LoadStore|#StoreStore \n\t" + "stx %[v], [%[rep_]] \n\t" + : + : [rep_] "r" (&rep_), [v] "r" (v) + : "memory"); + } + inline void* NoBarrier_Load() const { return rep_; } + inline void NoBarrier_Store(void* v) { rep_ = v; } +}; + +// Atomic pointer based on ia64 acq/rel +#elif defined(__ia64) && defined(__GNUC__) +class AtomicPointer { + private: + void* rep_; + public: + AtomicPointer() { } + explicit AtomicPointer(void* v) : rep_(v) { } + inline void* Acquire_Load() const { + void* val ; + __asm__ __volatile__ ( + "ld8.acq %[val] = [%[rep_]] \n\t" + : [val] "=r" (val) + : [rep_] "r" (&rep_) + : "memory" + ); + return val; + } + inline void Release_Store(void* v) { + __asm__ __volatile__ ( + "st8.rel [%[rep_]] = %[v] \n\t" + : + : [rep_] "r" (&rep_), [v] "r" (v) + : "memory" + ); + } + inline void* NoBarrier_Load() const { return rep_; } + inline void NoBarrier_Store(void* v) { rep_ = v; } +}; + // We have neither MemoryBarrier(), nor <cstdatomic> #else #error Please implement AtomicPointer for this platform. @@ -145,6 +216,7 @@ class AtomicPointer { #undef LEVELDB_HAVE_MEMORY_BARRIER #undef ARCH_CPU_X86_FAMILY #undef ARCH_CPU_ARM_FAMILY +#undef ARCH_CPU_PPC_FAMILY } // namespace port } // namespace leveldb diff --git a/port/port_posix.h b/port/port_posix.h index 654a4b9..6ca352e 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -21,13 +21,20 @@ #else #define PLATFORM_IS_LITTLE_ENDIAN false #endif -#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) ||\ +#elif defined(OS_FREEBSD) + #include <sys/types.h> + #include <sys/endian.h> + #define PLATFORM_IS_LITTLE_ENDIAN (_BYTE_ORDER == _LITTLE_ENDIAN) +#elif defined(OS_OPENBSD) || defined(OS_NETBSD) ||\ defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID) #include <sys/types.h> #include <sys/endian.h> +#elif defined(OS_HPUX) + #define PLATFORM_IS_LITTLE_ENDIAN false #else #include <endian.h> #endif + #include <pthread.h> #ifdef SNAPPY #include <snappy.h> @@ -42,7 +49,7 @@ #if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ - defined(OS_ANDROID) + defined(OS_ANDROID) || defined(OS_HPUX) // Use fread/fwrite/fflush on platforms without _unlocked variants #define fread_unlocked fread #define fwrite_unlocked fwrite diff --git a/port/thread_annotations.h b/port/thread_annotations.h new file mode 100644 index 0000000..6f9b6a7 --- /dev/null +++ b/port/thread_annotations.h @@ -0,0 +1,59 @@ +// Copyright (c) 2012 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#ifndef STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H + +// Some environments provide custom macros to aid in static thread-safety +// analysis. Provide empty definitions of such macros unless they are already +// defined. + +#ifndef EXCLUSIVE_LOCKS_REQUIRED +#define EXCLUSIVE_LOCKS_REQUIRED(...) +#endif + +#ifndef SHARED_LOCKS_REQUIRED +#define SHARED_LOCKS_REQUIRED(...) +#endif + +#ifndef LOCKS_EXCLUDED +#define LOCKS_EXCLUDED(...) +#endif + +#ifndef LOCK_RETURNED +#define LOCK_RETURNED(x) +#endif + +#ifndef LOCKABLE +#define LOCKABLE +#endif + +#ifndef SCOPED_LOCKABLE +#define SCOPED_LOCKABLE +#endif + +#ifndef EXCLUSIVE_LOCK_FUNCTION +#define EXCLUSIVE_LOCK_FUNCTION(...) +#endif + +#ifndef SHARED_LOCK_FUNCTION +#define SHARED_LOCK_FUNCTION(...) +#endif + +#ifndef EXCLUSIVE_TRYLOCK_FUNCTION +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) +#endif + +#ifndef SHARED_TRYLOCK_FUNCTION +#define SHARED_TRYLOCK_FUNCTION(...) +#endif + +#ifndef UNLOCK_FUNCTION +#define UNLOCK_FUNCTION(...) +#endif + +#ifndef NO_THREAD_SAFETY_ANALYSIS +#define NO_THREAD_SAFETY_ANALYSIS +#endif + +#endif // STORAGE_LEVELDB_PORT_THREAD_ANNOTATIONS_H diff --git a/table/block.cc b/table/block.cc index 199d453..ab83c11 100644 --- a/table/block.cc +++ b/table/block.cc @@ -162,8 +162,8 @@ class Block::Iter : public Iterator { } virtual void Seek(const Slice& target) { - // Binary search in restart array to find the first restart point - // with a key >= target + // Binary search in restart array to find the last restart point + // with a key < target uint32_t left = 0; uint32_t right = num_restarts_ - 1; while (left < right) { diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 4a6ea1b..0bf8e8d 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -4,6 +4,7 @@ #include "leveldb/filter_policy.h" +#include "util/coding.h" #include "util/logging.h" #include "util/testharness.h" #include "util/testutil.h" @@ -13,8 +14,8 @@ namespace leveldb { static const int kVerbose = 1; static Slice Key(int i, char* buffer) { - memcpy(buffer, &i, sizeof(i)); - return Slice(buffer, sizeof(i)); + EncodeFixed32(buffer, i); + return Slice(buffer, sizeof(uint32_t)); } class BloomTest { diff --git a/util/coding.cc b/util/coding.cc index dbd7a65..21e3186 100644 --- a/util/coding.cc +++ b/util/coding.cc @@ -7,29 +7,29 @@ namespace leveldb { void EncodeFixed32(char* buf, uint32_t value) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(buf, &value, sizeof(value)); -#else - buf[0] = value & 0xff; - buf[1] = (value >> 8) & 0xff; - buf[2] = (value >> 16) & 0xff; - buf[3] = (value >> 24) & 0xff; -#endif + if (port::kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + } } void EncodeFixed64(char* buf, uint64_t value) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(buf, &value, sizeof(value)); -#else - buf[0] = value & 0xff; - buf[1] = (value >> 8) & 0xff; - buf[2] = (value >> 16) & 0xff; - buf[3] = (value >> 24) & 0xff; - buf[4] = (value >> 32) & 0xff; - buf[5] = (value >> 40) & 0xff; - buf[6] = (value >> 48) & 0xff; - buf[7] = (value >> 56) & 0xff; -#endif + if (port::kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + buf[4] = (value >> 32) & 0xff; + buf[5] = (value >> 40) & 0xff; + buf[6] = (value >> 48) & 0xff; + buf[7] = (value >> 56) & 0xff; + } } void PutFixed32(std::string* dst, uint32_t value) { diff --git a/util/env_posix.cc b/util/env_posix.cc index cb1f6fc..78e09c9 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -3,6 +3,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include <deque> +#include <set> #include <dirent.h> #include <errno.h> #include <fcntl.h> @@ -23,6 +24,7 @@ #include "leveldb/slice.h" #include "port/port.h" #include "util/logging.h" +#include "util/mutexlock.h" #include "util/posix_logger.h" namespace leveldb { @@ -90,18 +92,75 @@ class PosixRandomAccessFile: public RandomAccessFile { } }; +// Helper class to limit mmap file usage so that we do not end up +// running out virtual memory or running into kernel performance +// problems for very large databases. +class MmapLimiter { + public: + // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes. + MmapLimiter() { + SetAllowed(sizeof(void*) >= 8 ? 1000 : 0); + } + + // If another mmap slot is available, acquire it and return true. + // Else return false. + bool Acquire() { + if (GetAllowed() <= 0) { + return false; + } + MutexLock l(&mu_); + intptr_t x = GetAllowed(); + if (x <= 0) { + return false; + } else { + SetAllowed(x - 1); + return true; + } + } + + // Release a slot acquired by a previous call to Acquire() that returned true. + void Release() { + MutexLock l(&mu_); + SetAllowed(GetAllowed() + 1); + } + + private: + port::Mutex mu_; + port::AtomicPointer allowed_; + + intptr_t GetAllowed() const { + return reinterpret_cast<intptr_t>(allowed_.Acquire_Load()); + } + + // REQUIRES: mu_ must be held + void SetAllowed(intptr_t v) { + allowed_.Release_Store(reinterpret_cast<void*>(v)); + } + + MmapLimiter(const MmapLimiter&); + void operator=(const MmapLimiter&); +}; + // mmap() based random-access class PosixMmapReadableFile: public RandomAccessFile { private: std::string filename_; void* mmapped_region_; size_t length_; + MmapLimiter* limiter_; public: // base[0,length-1] contains the mmapped contents of the file. - PosixMmapReadableFile(const std::string& fname, void* base, size_t length) - : filename_(fname), mmapped_region_(base), length_(length) { } - virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); } + PosixMmapReadableFile(const std::string& fname, void* base, size_t length, + MmapLimiter* limiter) + : filename_(fname), mmapped_region_(base), length_(length), + limiter_(limiter) { + } + + virtual ~PosixMmapReadableFile() { + munmap(mmapped_region_, length_); + limiter_->Release(); + } virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { @@ -300,6 +359,25 @@ static int LockOrUnlock(int fd, bool lock) { class PosixFileLock : public FileLock { public: int fd_; + std::string name_; +}; + +// Set of locked files. We keep a separate set instead of just +// relying on fcntrl(F_SETLK) since fcntl(F_SETLK) does not provide +// any protection against multiple uses from the same process. +class PosixLockTable { + private: + port::Mutex mu_; + std::set<std::string> locked_files_; + public: + bool Insert(const std::string& fname) { + MutexLock l(&mu_); + return locked_files_.insert(fname).second; + } + void Remove(const std::string& fname) { + MutexLock l(&mu_); + locked_files_.erase(fname); + } }; class PosixEnv : public Env { @@ -329,19 +407,21 @@ class PosixEnv : public Env { int fd = open(fname.c_str(), O_RDONLY); if (fd < 0) { s = IOError(fname, errno); - } else if (sizeof(void*) >= 8) { - // Use mmap when virtual address-space is plentiful. + } else if (mmap_limit_.Acquire()) { uint64_t size; s = GetFileSize(fname, &size); if (s.ok()) { void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); if (base != MAP_FAILED) { - *result = new PosixMmapReadableFile(fname, base, size); + *result = new PosixMmapReadableFile(fname, base, size, &mmap_limit_); } else { s = IOError(fname, errno); } } close(fd); + if (!s.ok()) { + mmap_limit_.Release(); + } } else { *result = new PosixRandomAccessFile(fname, fd); } @@ -430,12 +510,17 @@ class PosixEnv : public Env { int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); if (fd < 0) { result = IOError(fname, errno); + } else if (!locks_.Insert(fname)) { + close(fd); + result = Status::IOError("lock " + fname, "already held by process"); } else if (LockOrUnlock(fd, true) == -1) { result = IOError("lock " + fname, errno); close(fd); + locks_.Remove(fname); } else { PosixFileLock* my_lock = new PosixFileLock; my_lock->fd_ = fd; + my_lock->name_ = fname; *lock = my_lock; } return result; @@ -447,6 +532,7 @@ class PosixEnv : public Env { if (LockOrUnlock(my_lock->fd_, false) == -1) { result = IOError("unlock", errno); } + locks_.Remove(my_lock->name_); close(my_lock->fd_); delete my_lock; return result; @@ -523,6 +609,9 @@ class PosixEnv : public Env { struct BGItem { void* arg; void (*function)(void*); }; typedef std::deque<BGItem> BGQueue; BGQueue queue_; + + PosixLockTable locks_; + MmapLimiter mmap_limit_; }; PosixEnv::PosixEnv() : page_size_(getpagesize()), diff --git a/util/mutexlock.h b/util/mutexlock.h index c3f3306..1ff5a9e 100644 --- a/util/mutexlock.h +++ b/util/mutexlock.h @@ -6,6 +6,7 @@ #define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ #include "port/port.h" +#include "port/thread_annotations.h" namespace leveldb { @@ -19,12 +20,13 @@ namespace leveldb { // ... some complex code, possibly with multiple return paths ... // } -class MutexLock { +class SCOPED_LOCKABLE MutexLock { public: - explicit MutexLock(port::Mutex *mu) : mu_(mu) { + explicit MutexLock(port::Mutex *mu) EXCLUSIVE_LOCK_FUNCTION(mu) + : mu_(mu) { this->mu_->Lock(); } - ~MutexLock() { this->mu_->Unlock(); } + ~MutexLock() UNLOCK_FUNCTION() { this->mu_->Unlock(); } private: port::Mutex *const mu_; |