summaryrefslogtreecommitdiff
path: root/api/leveldb
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2014-06-27 14:18:11 +1000
committerMichael Cahill <michael.cahill@wiredtiger.com>2014-06-27 14:18:11 +1000
commita721db22d53dc7d0dbf74d894d7c2b4d50de1fda (patch)
treea0b3c906634a2a8e852481c35e1be04dd61dd756 /api/leveldb
parent75916badfe91957c392b205e115ab5dc04b3f05d (diff)
downloadmongo-a721db22d53dc7d0dbf74d894d7c2b4d50de1fda.tar.gz
Add support for Basho-specific features.
Note that this doesn't yet solve the problem of installed include files: to use this in its current state, you would need something like: CPPFLAGS="-DHAVE_ELEVELDB -I/path/to/include/wiredtiger" ...
Diffstat (limited to 'api/leveldb')
-rw-r--r--api/leveldb/Makefile.am9
-rw-r--r--api/leveldb/include/leveldb/db.h17
-rw-r--r--api/leveldb/include/leveldb/env.h11
-rw-r--r--api/leveldb/include/leveldb/filter_policy.h3
-rw-r--r--api/leveldb/include/leveldb/options.h10
-rw-r--r--api/leveldb/include/leveldb/perf_count.h296
-rw-r--r--api/leveldb/leveldb_wt.cc90
-rw-r--r--api/leveldb/leveldb_wt.h6
-rw-r--r--api/leveldb/util/perf_count.cc657
9 files changed, 1092 insertions, 7 deletions
diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am
index 0ff5d083b8d..552bbddd70c 100644
--- a/api/leveldb/Makefile.am
+++ b/api/leveldb/Makefile.am
@@ -23,6 +23,11 @@ leveldbinclude_HEADERS = \
include/leveldb/table.h \
include/leveldb/write_batch.h
+if HAVE_ELEVELDB
+leveldbinclude_HEADERS += \
+ include/leveldb/perf_count.h
+endif
+
libwiredtiger_leveldb_la_LDFLAGS = -release @VERSION@
libwiredtiger_leveldb_la_SOURCES = \
leveldb_wt.cc \
@@ -30,6 +35,10 @@ libwiredtiger_leveldb_la_SOURCES = \
util/coding.cc util/comparator.cc util/env.cc util/env_posix.cc \
util/logging.cc util/options.cc util/status.cc
+if HAVE_ELEVELDB
+libwiredtiger_leveldb_la_SOURCES += util/perf_count.cc
+endif
+
leveldb_test_SOURCES = leveldb_test.cc
#leveldb_test_LDADD = $(top_builddir)/libwiredtiger.la
diff --git a/api/leveldb/include/leveldb/db.h b/api/leveldb/include/leveldb/db.h
index 40851b2aa83..d93c46b7859 100644
--- a/api/leveldb/include/leveldb/db.h
+++ b/api/leveldb/include/leveldb/db.h
@@ -38,6 +38,19 @@ struct Range {
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
};
+#if HAVE_ELEVELDB
+// Abstract holder for a DB value.
+// This allows callers to manage their own value buffers and have
+// DB values copied directly into those buffers.
+class Value {
+ public:
+ virtual Value& assign(const char* data, size_t size) = 0;
+
+ protected:
+ virtual ~Value();
+};
+#endif
+
// A DB is a persistent ordered map from keys to values.
// A DB is safe for concurrent access from multiple threads without
// any external synchronization.
@@ -82,6 +95,10 @@ class DB {
// May return some other Status on an error.
virtual Status Get(const ReadOptions& options,
const Slice& key, std::string* value) = 0;
+#if HAVE_ELEVELDB
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, Value* value) = 0;
+#endif
// Return a heap-allocated iterator over the contents of the database.
// The result of NewIterator() is initially invalid (caller must
diff --git a/api/leveldb/include/leveldb/env.h b/api/leveldb/include/leveldb/env.h
index b2072d02c1c..6b1af9c2825 100644
--- a/api/leveldb/include/leveldb/env.h
+++ b/api/leveldb/include/leveldb/env.h
@@ -17,6 +17,9 @@
#include <vector>
#include <stdarg.h>
#include <stdint.h>
+#if HAVE_ELEVELDB
+#include "leveldb/perf_count.h"
+#endif
#include "leveldb/status.h"
namespace leveldb {
@@ -145,6 +148,14 @@ class Env {
// Sleep/delay the thread for the perscribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
+#if HAVE_ELEVELDB
+ // Riak specific: Where supported, give count of background jobs pending.
+ virtual int GetBackgroundBacklog() const {return(0);};
+
+ // Riak specific: Get object that is tracking various software counters
+ virtual PerformanceCounters * GetPerformanceCounters() {return(gPerfCounters);}
+#endif
+
private:
// No copying allowed
Env(const Env&);
diff --git a/api/leveldb/include/leveldb/filter_policy.h b/api/leveldb/include/leveldb/filter_policy.h
index 1fba08001fc..9f824fd1e48 100644
--- a/api/leveldb/include/leveldb/filter_policy.h
+++ b/api/leveldb/include/leveldb/filter_policy.h
@@ -64,6 +64,9 @@ class FilterPolicy {
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
// trailing spaces in keys.
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
+#if HAVE_ELEVELDB
+extern const FilterPolicy* NewBloomFilterPolicy2(int bits_per_key);
+#endif
}
diff --git a/api/leveldb/include/leveldb/options.h b/api/leveldb/include/leveldb/options.h
index fdda718d309..c8e4ba668ff 100644
--- a/api/leveldb/include/leveldb/options.h
+++ b/api/leveldb/include/leveldb/options.h
@@ -56,6 +56,16 @@ struct Options {
// Default: false
bool paranoid_checks;
+#if HAVE_ELEVELDB
+ // Riak specific: this variable replaces paranoid_checks at one
+ // one place in the code. This variable alone controls whether or not
+ // compaction read operations check CRC values. Riak needs
+ // the compaction CRC check, but not other paranoid_checks ... so
+ // this independent control.
+ // Default: true
+ bool verify_compactions;
+#endif
+
// Use the specified object to interact with the environment,
// e.g. to read/write files, schedule background work, etc.
// Default: Env::Default()
diff --git a/api/leveldb/include/leveldb/perf_count.h b/api/leveldb/include/leveldb/perf_count.h
new file mode 100644
index 00000000000..4ed215e20b1
--- /dev/null
+++ b/api/leveldb/include/leveldb/perf_count.h
@@ -0,0 +1,296 @@
+// -------------------------------------------------------------------
+//
+// perf_count.h: performance counters LevelDB
+//
+// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved.
+//
+// This file is provided to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file
+// except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// -------------------------------------------------------------------
+
+#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+#define STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+
+#include <stdint.h>
+#include <string>
+#include "leveldb/status.h"
+
+namespace leveldb {
+
+enum SstCountEnum
+{
+ //
+ // array index values/names
+ //
+ eSstCountKeys=0, //!< how many keys in this sst
+ eSstCountBlocks=1, //!< how many blocks in this sst
+ eSstCountCompressAborted=2,//!< how many blocks attempted compression and aborted use
+ eSstCountKeySize=3, //!< byte count of all keys
+ eSstCountValueSize=4, //!< byte count of all values
+ eSstCountBlockSize=5, //!< byte count of all blocks (pre-compression)
+ eSstCountBlockWriteSize=6, //!< post-compression size, or BlockSize if no compression
+ eSstCountIndexKeys=7, //!< how many keys in the index block
+ eSstCountKeyLargest=8, //!< largest key in sst
+ eSstCountKeySmallest=9, //!< smallest key in sst
+ eSstCountValueLargest=10, //!< largest value in sst
+ eSstCountValueSmallest=11, //!< smallest value in sst
+ eSstCountDeleteKey=12, //!< tombstone count
+ eSstCountBlockSizeUsed=13, //!< Options::block_size used with this file
+ eSstCountUserDataSize=14, //!< post-compression size of non-metadata (user keys/values/block overhead)
+
+ // must follow last index name to represent size of array
+ eSstCountEnumSize, //!< size of the array described by the enum values
+
+ eSstCountVersion=1
+
+}; // enum SstCountEnum
+
+
+class SstCounters
+{
+protected:
+ bool m_IsReadOnly; //!< set when data decoded from a file
+ uint32_t m_Version; //!< object revision identification
+ uint32_t m_CounterSize; //!< number of objects in m_Counter
+
+ uint64_t m_Counter[eSstCountEnumSize];
+
+public:
+ // constructors / destructor
+ SstCounters();
+
+ // Put data into disk form
+ void EncodeTo(std::string & Dst) const;
+
+ // Populate member data from prior EncodeTo block
+ Status DecodeFrom(const Slice& src);
+
+ // increment the counter
+ uint64_t Inc(unsigned Index);
+
+ // add value to the counter
+ uint64_t Add(unsigned Index, uint64_t Amount);
+
+ // return value of a counter
+ uint64_t Value(unsigned Index) const;
+
+ // set a value
+ void Set(unsigned Index, uint64_t);
+
+ // return number of counters
+ uint32_t Size() const {return(m_CounterSize);};
+
+ // printf all values
+ void Dump() const;
+
+}; // class SstCounters
+
+
+extern struct PerformanceCounters * gPerfCounters;
+
+
+enum PerformanceCountersEnum
+{
+ //
+ // array index values/names
+ // (enum explicitly numbered to allow future edits / moves / inserts)
+ //
+ ePerfROFileOpen=0, //!< PosixMmapReadableFile open
+ ePerfROFileClose=1, //!< closed
+ ePerfROFileUnmap=2, //!< unmap without close
+
+ ePerfRWFileOpen=3, //!< PosixMmapFile open
+ ePerfRWFileClose=4, //!< closed
+ ePerfRWFileUnmap=5, //!< unmap without close
+
+ ePerfApiOpen=6, //!< Count of DB::Open completions
+ ePerfApiGet=7, //!< Count of DBImpl::Get completions
+ ePerfApiWrite=8, //!< Count of DBImpl::Get completions
+
+ ePerfWriteSleep=9, //!< DBImpl::MakeRoomForWrite called sleep
+ ePerfWriteWaitImm=10, //!< DBImpl::MakeRoomForWrite called Wait on Imm compact
+ ePerfWriteWaitLevel0=11,//!< DBImpl::MakeRoomForWrite called Wait on Level0 compact
+ ePerfWriteNewMem=12, //!< DBImpl::MakeRoomForWrite created new memory log
+ ePerfWriteError=13, //!< DBImpl::MakeRoomForWrite saw bg_error_
+ ePerfWriteNoWait=14, //!< DBImpl::MakeRoomForWrite took no action
+
+ ePerfGetMem=15, //!< DBImpl::Get read from memory log
+ ePerfGetImm=16, //!< DBImpl::Get read from previous memory log
+ ePerfGetVersion=17, //!< DBImpl::Get read from Version object
+
+ // code ASSUMES the levels are in numerical order,
+ // i.e. based off of ePerfSearchLevel0
+ ePerfSearchLevel0=18, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel1=19, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel2=20, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel3=21, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel4=22, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel5=23, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel6=24, //!< Version::Get read searched one or more files here
+
+ ePerfTableCached=25, //!< TableCache::FindTable found table in cache
+ ePerfTableOpened=26, //!< TableCache::FindTable had to open table file
+ ePerfTableGet=27, //!< TableCache::Get used to retrieve a key
+
+ ePerfBGCloseUnmap=28, //!< PosixEnv::BGThreaed started Unmap/Close job
+ ePerfBGCompactImm=29, //!< PosixEnv::BGThreaed started compaction of Imm
+ ePerfBGNormal=30, //!< PosixEnv::BGThreaed started normal compaction job
+ ePerfBGCompactLevel0=31,//!< PosixEnv::BGThreaed started compaction of Level0
+
+ ePerfBlockFiltered=32, //!< Table::BlockReader search stopped due to filter
+ ePerfBlockFilterFalse=33,//!< Table::BlockReader gave a false positive for match
+ ePerfBlockCached=34, //!< Table::BlockReader found block in cache
+ ePerfBlockRead=35, //!< Table::BlockReader read block from disk
+ ePerfBlockFilterRead=36,//!< Table::ReadMeta filter loaded from file
+ ePerfBlockValidGet=37, //!< Table::InternalGet has valid iterator
+
+ ePerfDebug0=38, //!< Developer debug counters, moveable
+ ePerfDebug1=39, //!< Developer debug counters, moveable
+ ePerfDebug2=40, //!< Developer debug counters, moveable
+ ePerfDebug3=41, //!< Developer debug counters, moveable
+ ePerfDebug4=42, //!< Developer debug counters, moveable
+
+ ePerfReadBlockError=43, //!< crc or compression error in ReadBlock (format.cc)
+
+ ePerfIterNew=44, //!< Count of DBImpl::NewDBIterator calls
+ ePerfIterNext=45, //!< Count of DBIter::Next calls
+ ePerfIterPrev=46, //!< Count of DBIter::Prev calls
+ ePerfIterSeek=47, //!< Count of DBIter::Seek calls
+ ePerfIterSeekFirst=48, //!< Count of DBIter::SeekFirst calls
+ ePerfIterSeekLast=49, //!< Count of DBIter::SeekLast calls
+ ePerfIterDelete=50, //!< Count of DBIter::~DBIter
+
+ ePerfElevelDirect=51, //!< eleveldb's FindWaitingThread went direct to thread
+ ePerfElevelQueued=52, //!< eleveldb's FindWaitingThread queued work item
+ ePerfElevelDequeued=53, //!< eleveldb's worker took item from backlog queue
+
+ ePerfElevelRefCreate=54,//!< eleveldb RefObject constructed
+ ePerfElevelRefDelete=55,//!< eleveldb RefObject destructed
+
+ ePerfThrottleGauge=56, //!< current throttle value
+ ePerfThrottleCounter=57,//!< running throttle by seconds
+
+ ePerfThrottleMicros0=58,//!< level 0 micros spent compacting
+ ePerfThrottleKeys0=59, //!< level 0 keys processed
+ ePerfThrottleBacklog0=60,//!< backlog at time of posting (level0)
+ ePerfThrottleCompacts0=61,//!< number of level 0 compactions
+
+ ePerfThrottleMicros1=62,//!< level 1+ micros spent compacting
+ ePerfThrottleKeys1=63, //!< level 1+ keys processed
+ ePerfThrottleBacklog1=64,//!< backlog at time of posting (level1+)
+ ePerfThrottleCompacts1=65,//!< number of level 1+ compactions
+
+ ePerfBGWriteError=66, //!< error in write/close, see syslog
+
+ ePerfThrottleWait=67, //!< milliseconds of throttle wait
+ ePerfThreadError=68, //!< system error on thread related call, no LOG access
+
+ ePerfBGImmDirect=69, //!< count Imm compactions happened directly
+ ePerfBGImmQueued=70, //!< count Imm compactions placed on queue
+ ePerfBGImmDequeued=71, //!< count Imm compactions removed from queue
+ ePerfBGImmWeighted=72, //!< total microseconds item spent on queue
+
+ ePerfBGUnmapDirect=73, //!< count Unmap operations happened directly
+ ePerfBGUnmapQueued=74, //!< count Unmap operations placed on queue
+ ePerfBGUnmapDequeued=75,//!< count Unmap operations removed from queue
+ ePerfBGUnmapWeighted=76,//!< total microseconds item spent on queue
+
+ ePerfBGLevel0Direct=77, //!< count Level0 compactions happened directly
+ ePerfBGLevel0Queued=78, //!< count Level0 compactions placed on queue
+ ePerfBGLevel0Dequeued=79,//!< count Level0 compactions removed from queue
+ ePerfBGLevel0Weighted=80,//!< total microseconds item spent on queue
+
+ ePerfBGCompactDirect=81, //!< count generic compactions happened directly
+ ePerfBGCompactQueued=82, //!< count generic compactions placed on queue
+ ePerfBGCompactDequeued=83,//!< count generic compactions removed from queue
+ ePerfBGCompactWeighted=84,//!< total microseconds item spent on queue
+
+ ePerfFileCacheInsert=85, //!< total bytes inserted into file cache
+ ePerfFileCacheRemove=86, //!< total bytes removed from file cache
+
+ ePerfBlockCacheInsert=87, //!< total bytes inserted into block cache
+ ePerfBlockCacheRemove=88, //!< total bytes removed from block cache
+
+ ePerfApiDelete=89, //!< Count of DB::Delete
+
+ // must follow last index name to represent size of array
+ // (ASSUMES previous enum is highest value)
+ ePerfCountEnumSize, //!< size of the array described by the enum values
+
+ ePerfVersion=1, //!< structure versioning
+ ePerfKey=41207 //!< random number as shared memory identifier
+};
+
+//
+// Do NOT use virtual functions. This structure will be aligned at different
+// locations in multiple processes. Things can get messy with virtuals.
+
+struct PerformanceCounters
+{
+public:
+ static int m_LastError;
+
+protected:
+ uint32_t m_Version; //!< object revision identification
+ uint32_t m_CounterSize; //!< number of objects in m_Counter
+
+ volatile uint64_t m_Counter[ePerfCountEnumSize];
+
+ static const char * m_PerfCounterNames[];
+ static int m_PerfSharedId;
+ static volatile uint64_t m_BogusCounter; //!< for out of range GetPtr calls
+
+public:
+ // only called for local object, not for shared memory
+ PerformanceCounters();
+
+ //!< does executable's idea of version match shared object?
+ bool VersionTest()
+ {return(ePerfCountEnumSize<=m_CounterSize && ePerfVersion==m_Version);};
+
+ //!< mostly for perf_count_test.cc
+ void SetVersion(uint32_t Version, uint32_t CounterSize)
+ {m_Version=Version; m_CounterSize=CounterSize;};
+
+ static PerformanceCounters * Init(bool IsReadOnly);
+ static int Close(PerformanceCounters * Counts);
+
+ uint64_t Inc(unsigned Index);
+ uint64_t Dec(unsigned Index);
+
+ // add value to the counter
+ uint64_t Add(unsigned Index, uint64_t Amount);
+
+ // return value of a counter
+ uint64_t Value(unsigned Index) const;
+
+ // set a value
+ void Set(unsigned Index, uint64_t);
+
+ volatile const uint64_t * GetPtr(unsigned Index) const;
+
+ static const char * GetNamePtr(unsigned Index);
+
+ int LookupCounter(const char * Name);
+
+ void Dump();
+
+}; // struct PerformanceCounters
+
+extern PerformanceCounters * gPerfCounters;
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc
index fbf6d00d20e..f156a29c139 100644
--- a/api/leveldb/leveldb_wt.cc
+++ b/api/leveldb/leveldb_wt.cc
@@ -42,15 +42,35 @@ using leveldb::Range;
using leveldb::Slice;
using leveldb::Snapshot;
using leveldb::Status;
-#ifdef HAVE_HYPERLEVELDB
+#if HAVE_ELEVELDB
+using leveldb::Value;
namespace leveldb {
-class ReplayIterator;
+Value::~Value() {}
+
+class StringValue : public Value {
+ public:
+ explicit StringValue(std::string& val) : value_(val) {}
+ ~StringValue() {}
+
+ StringValue& assign(const char* data, size_t size) {
+ value_.assign(data, size);
+ return *this;
+ }
+
+ private:
+ std::string& value_;
+};
}
#endif
#define WT_URI "table:data"
-#define WT_CONN_CONFIG "log=(enabled),checkpoint_sync=false,session_max=256,"
-#define WT_TABLE_CONFIG "type=lsm,leaf_page_max=4KB,leaf_item_max=1KB,"
+#define WT_CONN_CONFIG "log=(enabled),checkpoint_sync=false,session_max=8192,"\
+ "mmap=false,transaction_sync=(enabled=true,method=none),"
+#define WT_TABLE_CONFIG "type=lsm,leaf_page_max=4KB,leaf_item_max=1KB," \
+ "internal_page_max=128K,lsm=(chunk_size=100MB," \
+ "bloom_config=(leaf_page_max=8MB)," \
+ "bloom_bit_count=28,bloom_hash_count=19," \
+ "bloom_oldest=true),"
/* Destructors required for interfaces. */
leveldb::DB::~DB() {}
@@ -153,13 +173,17 @@ public:
};
};
-
namespace leveldb {
FilterPolicy::~FilterPolicy() {}
const FilterPolicy *NewBloomFilterPolicy(int bits_per_key) {
return new FilterPolicyImpl(bits_per_key);
}
+#if HAVE_ELEVELDB
+const FilterPolicy *NewBloomFilterPolicy2(int bits_per_key) {
+ return NewBloomFilterPolicy(bits_per_key);
+}
+#endif
Cache::~Cache() {}
@@ -359,6 +383,11 @@ public:
virtual Status Get(const ReadOptions& options,
const Slice& key, std::string* value);
+#if HAVE_ELEVELDB
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, Value* value);
+#endif
+
#ifdef HAVE_HYPERLEVELDB
virtual Status LiveBackup(const Slice& name) {
return Status::NotSupported("sorry!");
@@ -460,13 +489,17 @@ leveldb::DB::Open(const Options &options, const std::string &name, leveldb::DB *
s_conn << "exclusive,";
if (options.compression == kSnappyCompression)
s_conn << "extensions=[libwiredtiger_snappy.so],";
- size_t cache_size = 25 * options.write_buffer_size;
+ size_t cache_size = 2 * options.write_buffer_size;
+ cache_size += options.max_open_files * (4 << 20);
if (options.block_cache)
cache_size += ((CacheImpl *)options.block_cache)->capacity_;
+ else
+ cache_size += 100 << 20;
s_conn << "cache_size=" << cache_size << ",";
std::string conn_config = s_conn.str();
WT_CONNECTION *conn;
+ fprintf(stderr,"Open: Home %s config %s\r\n",name.c_str(),conn_config.c_str());
int ret = ::wiredtiger_open(name.c_str(), NULL, conn_config.c_str(), &conn);
if (ret == ENOENT)
return Status::NotFound(Slice("Database does not exist."));
@@ -675,6 +708,51 @@ err:
return WiredTigerErrorToStatus(ret, errmsg);
}
+#if HAVE_ELEVELDB
+// If the database contains an entry for "key" store the
+// corresponding value in *value and return OK.
+//
+// If there is no entry for "key" leave *value unchanged and return
+// a status for which Status::IsNotFound() returns true.
+//
+// May return some other Status on an error.
+Status
+DbImpl::Get(const ReadOptions& options,
+ const Slice& key, Value* value)
+{
+ WT_CURSOR *cursor;
+ WT_ITEM item;
+ const SnapshotImpl *si = NULL;
+ const char *errmsg = NULL;
+
+ // Read options can contain a snapshot for us to use
+ if (options.snapshot == NULL) {
+ cursor = getCursor();
+ } else {
+ si = static_cast<const SnapshotImpl *>(options.snapshot);
+ if (!si->getStatus().ok())
+ return si->getStatus();
+ cursor = si->getCursor();
+ }
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->search(cursor);
+ if (ret == 0) {
+ ret = cursor->get_value(cursor, &item);
+ if (ret == 0)
+ value->assign((const char *)item.data, item.size);
+ } else if (ret == WT_NOTFOUND)
+ errmsg = "DB::Get key not found";
+err:
+ // Release the cursor if we are not in a snapshot
+ if (si == NULL)
+ releaseCursor(cursor);
+ return WiredTigerErrorToStatus(ret, errmsg);
+}
+#endif
+
// Return a heap-allocated iterator over the contents of the database.
// The result of NewIterator() is initially invalid (caller must
// call one of the Seek methods on the iterator before using it).
diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h
index 3025f3db0d5..520068547a6 100644
--- a/api/leveldb/leveldb_wt.h
+++ b/api/leveldb/leveldb_wt.h
@@ -27,7 +27,7 @@
#include "wiredtiger_config.h"
-#ifdef HAVE_HYPERLEVELDB
+#ifdef HAVE_HYPERLEVELDB
#include <hyperleveldb/cache.h>
#include <hyperleveldb/comparator.h>
#include <hyperleveldb/db.h>
@@ -43,10 +43,14 @@
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
#include "leveldb/slice.h"
#include "leveldb/status.h"
#include "leveldb/table_builder.h"
#include "leveldb/write_batch.h"
#endif
+#if HAVE_ELEVELDB
+#include "leveldb/perf_count.h"
+#endif
#include "wiredtiger.h"
diff --git a/api/leveldb/util/perf_count.cc b/api/leveldb/util/perf_count.cc
new file mode 100644
index 00000000000..d086f29379a
--- /dev/null
+++ b/api/leveldb/util/perf_count.cc
@@ -0,0 +1,657 @@
+// -------------------------------------------------------------------
+//
+// perf_count.cc: performance counters LevelDB
+//
+// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved.
+//
+// This file is provided to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file
+// except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// -------------------------------------------------------------------
+
+#include <limits.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <memory.h>
+#include <errno.h>
+
+#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+#include "leveldb/perf_count.h"
+#endif
+
+#include "util/coding.h"
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#ifdef OS_SOLARIS
+# include <atomic.h>
+#endif
+
+
+namespace leveldb
+{
+
+// always have something active in gPerfCounters, eliminates
+// need to test for "is shared object attached yet"
+static PerformanceCounters LocalStartupCounters;
+PerformanceCounters * gPerfCounters(&LocalStartupCounters);
+
+
+ SstCounters::SstCounters()
+ : m_IsReadOnly(false),
+ m_Version(eSstCountVersion),
+ m_CounterSize(eSstCountEnumSize)
+ {
+ memset(m_Counter, 0, sizeof(m_Counter));
+
+ m_Counter[eSstCountKeySmallest]=ULLONG_MAX;
+ m_Counter[eSstCountValueSmallest]=ULLONG_MAX;
+
+ return;
+
+ }; // SstCounters::SstCounters
+
+
+ void
+ SstCounters::EncodeTo(
+ std::string & Dst) const
+ {
+ unsigned loop;
+
+ PutVarint32(&Dst, m_Version);
+ PutVarint32(&Dst, m_CounterSize);
+
+ for(loop=0; loop<eSstCountEnumSize; ++loop)
+ PutVarint64(&Dst, m_Counter[loop]);
+ } // SstCounters::EncodeTo
+
+
+ Status
+ SstCounters::DecodeFrom(
+ const Slice& src)
+ {
+ Status ret_status;
+ Slice cursor;
+ bool good;
+ int loop;
+
+ cursor=src;
+ m_IsReadOnly=true;
+ good=GetVarint32(&cursor, &m_Version);
+ good=good && (m_Version<=eSstCountVersion);
+
+ // all lesser number of stats to be read
+ good=good && GetVarint32(&cursor, &m_CounterSize);
+ if (good && eSstCountEnumSize < m_CounterSize)
+ m_CounterSize=eSstCountEnumSize;
+
+ for (loop=0; good && loop<eSstCountEnumSize; ++loop)
+ {
+ good=GetVarint64(&cursor, &m_Counter[loop]);
+ } // for
+
+ // if (!good) change ret_status to bad
+
+ return(ret_status);
+
+ } // SstCounters::DecodeFrom
+
+
+ uint64_t
+ SstCounters::Inc(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (!m_IsReadOnly && Index<m_CounterSize)
+ {
+ ++m_Counter[Index];
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Inc
+
+
+ uint64_t
+ SstCounters::Add(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (!m_IsReadOnly && Index<m_CounterSize)
+ {
+ m_Counter[Index]+=Amount;
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Add
+
+
+ uint64_t
+ SstCounters::Value(
+ unsigned Index) const
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Value
+
+
+ void
+ SstCounters::Set(
+ unsigned Index,
+ uint64_t Value)
+ {
+ if (Index<m_CounterSize)
+ {
+ m_Counter[Index]=Value;
+ } // if
+
+ return;
+ } // SstCounters::Set
+
+
+ void
+ SstCounters::Dump() const
+ {
+ unsigned loop;
+
+ printf("SstCounters:\n");
+ printf(" m_IsReadOnly: %u\n", m_IsReadOnly);
+ printf(" m_Version: %u\n", m_Version);
+ printf(" m_CounterSize: %u\n", m_CounterSize);
+ for (loop=0; loop<m_CounterSize; ++loop)
+ printf(" Counter[%2u]: %" PRIu64 "\n", loop, m_Counter[loop]);
+
+ return;
+
+ } // SstCounters::Dump
+
+
+ // only used for local static objects, not shared memory objects
+ PerformanceCounters::PerformanceCounters()
+ {
+ m_Version=ePerfVersion;
+ m_CounterSize=ePerfCountEnumSize;
+ // cast away "volatile"
+ memset((void*)m_Counter, 0, sizeof(m_Counter));
+
+ return;
+
+ } // PerformanceCounters::PerformanceCounters
+
+
+ PerformanceCounters *
+ PerformanceCounters::Init(
+ bool IsReadOnly)
+ {
+ PerformanceCounters * ret_ptr;
+ bool should_create, good;
+ int ret_val, id;
+ struct shmid_ds shm_info;
+ size_t open_size;
+
+ ret_ptr=NULL;
+ memset(&shm_info, 0, sizeof(shm_info));
+ good=true;
+ open_size=sizeof(PerformanceCounters);
+
+ // first id attempt, minimal request
+ id=shmget(ePerfKey, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (-1!=id)
+ ret_val=shmctl(id, IPC_STAT, &shm_info);
+ else
+ ret_val=-1;
+
+ // does the shared memory already exists (and of proper size if writing)
+ should_create=(0!=ret_val || (shm_info.shm_segsz < sizeof(PerformanceCounters))) && !IsReadOnly;
+
+ // should old shared memory be deleted?
+ if (should_create && 0==ret_val)
+ {
+ ret_val=shmctl(id, IPC_RMID, &shm_info);
+ good=(0==ret_val);
+ if (0!=ret_val)
+ syslog(LOG_ERR, "shmctl IPC_RMID failed [%d, %m]", errno);
+ } // if
+
+ // else open the size that exists
+ else if (0==ret_val)
+ {
+ open_size=shm_info.shm_segsz;
+ } // else if
+
+ // attempt to attach/create to shared memory instance
+ if (good)
+ {
+ int flags;
+
+ if (IsReadOnly)
+ flags = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+ else
+ flags = IPC_CREAT | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+
+ m_PerfSharedId=shmget(ePerfKey, open_size, flags);
+ good=(-1!=m_PerfSharedId);
+ } // if
+
+ // map shared memory instance
+ if (good)
+ {
+ ret_ptr=(PerformanceCounters *)shmat(m_PerfSharedId, NULL, (IsReadOnly ? SHM_RDONLY : 0));
+ if ((void*)-1 != ret_ptr)
+ {
+ // initialize?
+ if (should_create || ePerfVersion!=ret_ptr->m_Version)
+ {
+ if (!IsReadOnly)
+ {
+ memset(ret_ptr, 0, sizeof(PerformanceCounters));
+ ret_ptr->m_Version=ePerfVersion;
+ ret_ptr->m_CounterSize=ePerfCountEnumSize;
+ } // if
+
+ // bad version match to existing segment
+ else
+ {
+ good=false;
+ errno=EINVAL;
+ } // else
+ } // if
+ } // if
+ else
+ {
+ good=false;
+ syslog(LOG_ERR, "shmat failed [%d, %m]", errno);
+ } // else
+
+ if (good)
+ {
+ // make this available process wide
+ gPerfCounters=ret_ptr;
+ } // if
+ else
+ {
+ ret_ptr=NULL;
+ m_LastError=errno;
+ } // else
+ } // if
+ else
+ {
+ m_LastError=errno;
+ ret_ptr=NULL;
+ } // else
+
+ return(ret_ptr);
+
+ }; // PerformanceCounters::Init
+
+
+ int
+ PerformanceCounters::Close(
+ PerformanceCounters * Counts)
+ {
+ int ret_val;
+
+ if (NULL!=Counts && &LocalStartupCounters != Counts)
+ {
+ // keep gPerf valid
+ if (gPerfCounters==Counts)
+ gPerfCounters=&LocalStartupCounters;
+
+ ret_val=shmdt(Counts);
+ if (0!=ret_val)
+ ret_val=errno;
+ } // if
+ else
+ {
+ ret_val=EINVAL;
+ } // else
+
+ return(ret_val);
+ } // PerformanceCounters::Close
+
+
+ uint64_t
+ PerformanceCounters::Inc(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ atomic_inc_64(val_ptr);
+#else
+ __sync_add_and_fetch(val_ptr, 1);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ ret_32=__sync_add_and_fetch(ptr_32, 1);
+ if (0==ret_32)
+ {
+ ++ptr_32;
+ __sync_add_and_fetch(ptr_32, 1);
+ } // if
+#endif
+ ret_val=*val_ptr;
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Inc
+
+
+ uint64_t
+ PerformanceCounters::Dec(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ atomic_dec_64(val_ptr);
+#else
+ __sync_sub_and_fetch(val_ptr, 1);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ ret_32=__sync_sub_and_fetch(ptr_32, 1);
+ if (0xFFFFFFFF==ret_32)
+ {
+ ++ptr_32;
+ __sync_sub_and_fetch(ptr_32, 1);
+ } // if
+#endif
+ ret_val=*val_ptr;
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Dec
+
+
+ uint64_t
+ PerformanceCounters::Add(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ ret_val=atomic_add_64_nv(val_ptr, Amount);
+#else
+ ret_val=__sync_add_and_fetch(val_ptr, Amount);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t old_32, ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ old_32=*ptr_32;
+ ret_32=__sync_add_and_fetch(ptr_32, Amount);
+ if (ret_32<old_32)
+ {
+ ++ptr_32;
+ __sync_add_and_fetch(ptr_32, 1);
+ } // if
+
+ ret_val=*val_ptr;
+#endif
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Add
+
+
+ uint64_t
+ PerformanceCounters::Value(
+ unsigned Index) const
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Value
+
+
+ void
+ PerformanceCounters::Set(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+ *val_ptr=Amount;
+ } // if
+
+ return;
+ } // PerformanceCounters::Set
+
+
+ volatile const uint64_t *
+ PerformanceCounters::GetPtr(
+ unsigned Index) const
+ {
+ const volatile uint64_t * ret_ptr;
+
+ if (Index<m_CounterSize)
+ ret_ptr=&m_Counter[Index];
+ else
+ ret_ptr=&m_BogusCounter;
+
+ return(ret_ptr);
+
+ } // PerformanceCounters::GetPtr
+
+
+ const char *
+ PerformanceCounters::GetNamePtr(
+ unsigned Index)
+ {
+ const char * ret_ptr;
+
+ if (Index<ePerfCountEnumSize)
+ ret_ptr=m_PerfCounterNames[Index];
+ else
+ ret_ptr="???";
+
+ return(ret_ptr);
+
+ } // PerformanceCounters::GetPtr
+
+
+ int PerformanceCounters::m_PerfSharedId=-1;
+ int PerformanceCounters::m_LastError=0;
+ volatile uint64_t PerformanceCounters::m_BogusCounter=0;
+ const char * PerformanceCounters::m_PerfCounterNames[]=
+ {
+ "ROFileOpen",
+ "ROFileClose",
+ "ROFileUnmap",
+ "RWFileOpen",
+ "RWFileClose",
+ "RWFileUnmap",
+ "ApiOpen",
+ "ApiGet",
+ "ApiWrite",
+ "WriteSleep",
+ "WriteWaitImm",
+ "WriteWaitLevel0",
+ "WriteNewMem",
+ "WriteError",
+ "WriteNoWait",
+ "GetMem",
+ "GetImm",
+ "GetVersion",
+ "SearchLevel[0]",
+ "SearchLevel[1]",
+ "SearchLevel[2]",
+ "SearchLevel[3]",
+ "SearchLevel[4]",
+ "SearchLevel[5]",
+ "SearchLevel[6]",
+ "TableCached",
+ "TableOpened",
+ "TableGet",
+ "BGCloseUnmap",
+ "BGCompactImm",
+ "BGNormal",
+ "BGCompactLevel0",
+ "BlockFiltered",
+ "BlockFilterFalse",
+ "BlockCached",
+ "BlockRead",
+ "BlockFilterRead",
+ "BlockValidGet",
+ "Debug[0]",
+ "Debug[1]",
+ "Debug[2]",
+ "Debug[3]",
+ "Debug[4]",
+ "ReadBlockError",
+ "DBIterNew",
+ "DBIterNext",
+ "DBIterPrev",
+ "DBIterSeek",
+ "DBIterSeekFirst",
+ "DBIterSeekLast",
+ "DBIterDelete",
+ "eleveldbDirect",
+ "eleveldbQueued",
+ "eleveldbDequeued",
+ "elevelRefCreate",
+ "elevelRefDelete",
+ "ThrottleGauge",
+ "ThrottleCounter",
+ "ThrottleMicros0",
+ "ThrottleKeys0",
+ "ThrottleBacklog0",
+ "ThrottleCompacts0",
+ "ThrottleMicros1",
+ "ThrottleKeys1",
+ "ThrottleBacklog1",
+ "ThrottleCompacts1",
+ "BGWriteError",
+ "ThrottleWait",
+ "ThreadError",
+ "BGImmDirect",
+ "BGImmQueued",
+ "BGImmDequeued",
+ "BGImmWeighted",
+ "BGUnmapDirect",
+ "BGUnmapQueued",
+ "BGUnmapDequeued",
+ "BGUnmapWeighted",
+ "BGLevel0Direct",
+ "BGLevel0Queued",
+ "BGLevel0Dequeued",
+ "BGLevel0Weighted",
+ "BGCompactDirect",
+ "BGCompactQueued",
+ "BGCompactDequeued",
+ "BGCompactWeighted",
+ "FileCacheInsert",
+ "FileCacheRemove",
+ "BlockCacheInsert",
+ "BlockCacheRemove",
+ "ApiDelete"
+ };
+
+
+ int
+ PerformanceCounters::LookupCounter(
+ const char * Name)
+ {
+ int index,loop;
+
+ index=-1;
+
+ if (NULL!=Name && '\0'!=*Name)
+ {
+ for (loop=0; loop<ePerfCountEnumSize && -1==index; ++loop)
+ {
+ if (0==strcmp(m_PerfCounterNames[loop], Name))
+ index=loop;
+ } // loop
+ } // if
+
+ return(index);
+ };
+
+ void
+ PerformanceCounters::Dump()
+ {
+ int loop;
+
+ printf(" m_Version: %u\n", m_Version);
+ printf(" m_CounterSize: %u\n", m_CounterSize);
+
+ for (loop=0; loop<ePerfCountEnumSize; ++loop)
+ {
+ printf(" %s: %" PRIu64 "\n", m_PerfCounterNames[loop], m_Counter[loop]);
+ } // loop
+ }; // Dump
+
+} // namespace leveldb