diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2014-06-27 14:18:11 +1000 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2014-06-27 14:18:11 +1000 |
commit | a721db22d53dc7d0dbf74d894d7c2b4d50de1fda (patch) | |
tree | a0b3c906634a2a8e852481c35e1be04dd61dd756 /api/leveldb | |
parent | 75916badfe91957c392b205e115ab5dc04b3f05d (diff) | |
download | mongo-a721db22d53dc7d0dbf74d894d7c2b4d50de1fda.tar.gz |
Add support for Basho-specific features.
Note that this doesn't yet solve the problem of installed include files: to use this in its current state, you would need something like:
CPPFLAGS="-DHAVE_ELEVELDB -I/path/to/include/wiredtiger" ...
Diffstat (limited to 'api/leveldb')
-rw-r--r-- | api/leveldb/Makefile.am | 9 | ||||
-rw-r--r-- | api/leveldb/include/leveldb/db.h | 17 | ||||
-rw-r--r-- | api/leveldb/include/leveldb/env.h | 11 | ||||
-rw-r--r-- | api/leveldb/include/leveldb/filter_policy.h | 3 | ||||
-rw-r--r-- | api/leveldb/include/leveldb/options.h | 10 | ||||
-rw-r--r-- | api/leveldb/include/leveldb/perf_count.h | 296 | ||||
-rw-r--r-- | api/leveldb/leveldb_wt.cc | 90 | ||||
-rw-r--r-- | api/leveldb/leveldb_wt.h | 6 | ||||
-rw-r--r-- | api/leveldb/util/perf_count.cc | 657 |
9 files changed, 1092 insertions, 7 deletions
diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am index 0ff5d083b8d..552bbddd70c 100644 --- a/api/leveldb/Makefile.am +++ b/api/leveldb/Makefile.am @@ -23,6 +23,11 @@ leveldbinclude_HEADERS = \ include/leveldb/table.h \ include/leveldb/write_batch.h +if HAVE_ELEVELDB +leveldbinclude_HEADERS += \ + include/leveldb/perf_count.h +endif + libwiredtiger_leveldb_la_LDFLAGS = -release @VERSION@ libwiredtiger_leveldb_la_SOURCES = \ leveldb_wt.cc \ @@ -30,6 +35,10 @@ libwiredtiger_leveldb_la_SOURCES = \ util/coding.cc util/comparator.cc util/env.cc util/env_posix.cc \ util/logging.cc util/options.cc util/status.cc +if HAVE_ELEVELDB +libwiredtiger_leveldb_la_SOURCES += util/perf_count.cc +endif + leveldb_test_SOURCES = leveldb_test.cc #leveldb_test_LDADD = $(top_builddir)/libwiredtiger.la diff --git a/api/leveldb/include/leveldb/db.h b/api/leveldb/include/leveldb/db.h index 40851b2aa83..d93c46b7859 100644 --- a/api/leveldb/include/leveldb/db.h +++ b/api/leveldb/include/leveldb/db.h @@ -38,6 +38,19 @@ struct Range { Range(const Slice& s, const Slice& l) : start(s), limit(l) { } }; +#if HAVE_ELEVELDB +// Abstract holder for a DB value. +// This allows callers to manage their own value buffers and have +// DB values copied directly into those buffers. +class Value { + public: + virtual Value& assign(const char* data, size_t size) = 0; + + protected: + virtual ~Value(); +}; +#endif + // A DB is a persistent ordered map from keys to values. // A DB is safe for concurrent access from multiple threads without // any external synchronization. @@ -82,6 +95,10 @@ class DB { // May return some other Status on an error. virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) = 0; +#if HAVE_ELEVELDB + virtual Status Get(const ReadOptions& options, + const Slice& key, Value* value) = 0; +#endif // Return a heap-allocated iterator over the contents of the database. // The result of NewIterator() is initially invalid (caller must diff --git a/api/leveldb/include/leveldb/env.h b/api/leveldb/include/leveldb/env.h index b2072d02c1c..6b1af9c2825 100644 --- a/api/leveldb/include/leveldb/env.h +++ b/api/leveldb/include/leveldb/env.h @@ -17,6 +17,9 @@ #include <vector> #include <stdarg.h> #include <stdint.h> +#if HAVE_ELEVELDB +#include "leveldb/perf_count.h" +#endif #include "leveldb/status.h" namespace leveldb { @@ -145,6 +148,14 @@ class Env { // Sleep/delay the thread for the perscribed number of micro-seconds. virtual void SleepForMicroseconds(int micros) = 0; +#if HAVE_ELEVELDB + // Riak specific: Where supported, give count of background jobs pending. + virtual int GetBackgroundBacklog() const {return(0);}; + + // Riak specific: Get object that is tracking various software counters + virtual PerformanceCounters * GetPerformanceCounters() {return(gPerfCounters);} +#endif + private: // No copying allowed Env(const Env&); diff --git a/api/leveldb/include/leveldb/filter_policy.h b/api/leveldb/include/leveldb/filter_policy.h index 1fba08001fc..9f824fd1e48 100644 --- a/api/leveldb/include/leveldb/filter_policy.h +++ b/api/leveldb/include/leveldb/filter_policy.h @@ -64,6 +64,9 @@ class FilterPolicy { // FilterPolicy (like NewBloomFilterPolicy) that does not ignore // trailing spaces in keys. extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); +#if HAVE_ELEVELDB +extern const FilterPolicy* NewBloomFilterPolicy2(int bits_per_key); +#endif } diff --git a/api/leveldb/include/leveldb/options.h b/api/leveldb/include/leveldb/options.h index fdda718d309..c8e4ba668ff 100644 --- a/api/leveldb/include/leveldb/options.h +++ b/api/leveldb/include/leveldb/options.h @@ -56,6 +56,16 @@ struct Options { // Default: false bool paranoid_checks; +#if HAVE_ELEVELDB + // Riak specific: this variable replaces paranoid_checks at one + // one place in the code. This variable alone controls whether or not + // compaction read operations check CRC values. Riak needs + // the compaction CRC check, but not other paranoid_checks ... so + // this independent control. + // Default: true + bool verify_compactions; +#endif + // Use the specified object to interact with the environment, // e.g. to read/write files, schedule background work, etc. // Default: Env::Default() diff --git a/api/leveldb/include/leveldb/perf_count.h b/api/leveldb/include/leveldb/perf_count.h new file mode 100644 index 00000000000..4ed215e20b1 --- /dev/null +++ b/api/leveldb/include/leveldb/perf_count.h @@ -0,0 +1,296 @@ +// ------------------------------------------------------------------- +// +// perf_count.h: performance counters LevelDB +// +// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved. +// +// This file is provided to you under the Apache License, +// Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// ------------------------------------------------------------------- + +#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ +#define STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ + +#include <stdint.h> +#include <string> +#include "leveldb/status.h" + +namespace leveldb { + +enum SstCountEnum +{ + // + // array index values/names + // + eSstCountKeys=0, //!< how many keys in this sst + eSstCountBlocks=1, //!< how many blocks in this sst + eSstCountCompressAborted=2,//!< how many blocks attempted compression and aborted use + eSstCountKeySize=3, //!< byte count of all keys + eSstCountValueSize=4, //!< byte count of all values + eSstCountBlockSize=5, //!< byte count of all blocks (pre-compression) + eSstCountBlockWriteSize=6, //!< post-compression size, or BlockSize if no compression + eSstCountIndexKeys=7, //!< how many keys in the index block + eSstCountKeyLargest=8, //!< largest key in sst + eSstCountKeySmallest=9, //!< smallest key in sst + eSstCountValueLargest=10, //!< largest value in sst + eSstCountValueSmallest=11, //!< smallest value in sst + eSstCountDeleteKey=12, //!< tombstone count + eSstCountBlockSizeUsed=13, //!< Options::block_size used with this file + eSstCountUserDataSize=14, //!< post-compression size of non-metadata (user keys/values/block overhead) + + // must follow last index name to represent size of array + eSstCountEnumSize, //!< size of the array described by the enum values + + eSstCountVersion=1 + +}; // enum SstCountEnum + + +class SstCounters +{ +protected: + bool m_IsReadOnly; //!< set when data decoded from a file + uint32_t m_Version; //!< object revision identification + uint32_t m_CounterSize; //!< number of objects in m_Counter + + uint64_t m_Counter[eSstCountEnumSize]; + +public: + // constructors / destructor + SstCounters(); + + // Put data into disk form + void EncodeTo(std::string & Dst) const; + + // Populate member data from prior EncodeTo block + Status DecodeFrom(const Slice& src); + + // increment the counter + uint64_t Inc(unsigned Index); + + // add value to the counter + uint64_t Add(unsigned Index, uint64_t Amount); + + // return value of a counter + uint64_t Value(unsigned Index) const; + + // set a value + void Set(unsigned Index, uint64_t); + + // return number of counters + uint32_t Size() const {return(m_CounterSize);}; + + // printf all values + void Dump() const; + +}; // class SstCounters + + +extern struct PerformanceCounters * gPerfCounters; + + +enum PerformanceCountersEnum +{ + // + // array index values/names + // (enum explicitly numbered to allow future edits / moves / inserts) + // + ePerfROFileOpen=0, //!< PosixMmapReadableFile open + ePerfROFileClose=1, //!< closed + ePerfROFileUnmap=2, //!< unmap without close + + ePerfRWFileOpen=3, //!< PosixMmapFile open + ePerfRWFileClose=4, //!< closed + ePerfRWFileUnmap=5, //!< unmap without close + + ePerfApiOpen=6, //!< Count of DB::Open completions + ePerfApiGet=7, //!< Count of DBImpl::Get completions + ePerfApiWrite=8, //!< Count of DBImpl::Get completions + + ePerfWriteSleep=9, //!< DBImpl::MakeRoomForWrite called sleep + ePerfWriteWaitImm=10, //!< DBImpl::MakeRoomForWrite called Wait on Imm compact + ePerfWriteWaitLevel0=11,//!< DBImpl::MakeRoomForWrite called Wait on Level0 compact + ePerfWriteNewMem=12, //!< DBImpl::MakeRoomForWrite created new memory log + ePerfWriteError=13, //!< DBImpl::MakeRoomForWrite saw bg_error_ + ePerfWriteNoWait=14, //!< DBImpl::MakeRoomForWrite took no action + + ePerfGetMem=15, //!< DBImpl::Get read from memory log + ePerfGetImm=16, //!< DBImpl::Get read from previous memory log + ePerfGetVersion=17, //!< DBImpl::Get read from Version object + + // code ASSUMES the levels are in numerical order, + // i.e. based off of ePerfSearchLevel0 + ePerfSearchLevel0=18, //!< Version::Get read searched one or more files here + ePerfSearchLevel1=19, //!< Version::Get read searched one or more files here + ePerfSearchLevel2=20, //!< Version::Get read searched one or more files here + ePerfSearchLevel3=21, //!< Version::Get read searched one or more files here + ePerfSearchLevel4=22, //!< Version::Get read searched one or more files here + ePerfSearchLevel5=23, //!< Version::Get read searched one or more files here + ePerfSearchLevel6=24, //!< Version::Get read searched one or more files here + + ePerfTableCached=25, //!< TableCache::FindTable found table in cache + ePerfTableOpened=26, //!< TableCache::FindTable had to open table file + ePerfTableGet=27, //!< TableCache::Get used to retrieve a key + + ePerfBGCloseUnmap=28, //!< PosixEnv::BGThreaed started Unmap/Close job + ePerfBGCompactImm=29, //!< PosixEnv::BGThreaed started compaction of Imm + ePerfBGNormal=30, //!< PosixEnv::BGThreaed started normal compaction job + ePerfBGCompactLevel0=31,//!< PosixEnv::BGThreaed started compaction of Level0 + + ePerfBlockFiltered=32, //!< Table::BlockReader search stopped due to filter + ePerfBlockFilterFalse=33,//!< Table::BlockReader gave a false positive for match + ePerfBlockCached=34, //!< Table::BlockReader found block in cache + ePerfBlockRead=35, //!< Table::BlockReader read block from disk + ePerfBlockFilterRead=36,//!< Table::ReadMeta filter loaded from file + ePerfBlockValidGet=37, //!< Table::InternalGet has valid iterator + + ePerfDebug0=38, //!< Developer debug counters, moveable + ePerfDebug1=39, //!< Developer debug counters, moveable + ePerfDebug2=40, //!< Developer debug counters, moveable + ePerfDebug3=41, //!< Developer debug counters, moveable + ePerfDebug4=42, //!< Developer debug counters, moveable + + ePerfReadBlockError=43, //!< crc or compression error in ReadBlock (format.cc) + + ePerfIterNew=44, //!< Count of DBImpl::NewDBIterator calls + ePerfIterNext=45, //!< Count of DBIter::Next calls + ePerfIterPrev=46, //!< Count of DBIter::Prev calls + ePerfIterSeek=47, //!< Count of DBIter::Seek calls + ePerfIterSeekFirst=48, //!< Count of DBIter::SeekFirst calls + ePerfIterSeekLast=49, //!< Count of DBIter::SeekLast calls + ePerfIterDelete=50, //!< Count of DBIter::~DBIter + + ePerfElevelDirect=51, //!< eleveldb's FindWaitingThread went direct to thread + ePerfElevelQueued=52, //!< eleveldb's FindWaitingThread queued work item + ePerfElevelDequeued=53, //!< eleveldb's worker took item from backlog queue + + ePerfElevelRefCreate=54,//!< eleveldb RefObject constructed + ePerfElevelRefDelete=55,//!< eleveldb RefObject destructed + + ePerfThrottleGauge=56, //!< current throttle value + ePerfThrottleCounter=57,//!< running throttle by seconds + + ePerfThrottleMicros0=58,//!< level 0 micros spent compacting + ePerfThrottleKeys0=59, //!< level 0 keys processed + ePerfThrottleBacklog0=60,//!< backlog at time of posting (level0) + ePerfThrottleCompacts0=61,//!< number of level 0 compactions + + ePerfThrottleMicros1=62,//!< level 1+ micros spent compacting + ePerfThrottleKeys1=63, //!< level 1+ keys processed + ePerfThrottleBacklog1=64,//!< backlog at time of posting (level1+) + ePerfThrottleCompacts1=65,//!< number of level 1+ compactions + + ePerfBGWriteError=66, //!< error in write/close, see syslog + + ePerfThrottleWait=67, //!< milliseconds of throttle wait + ePerfThreadError=68, //!< system error on thread related call, no LOG access + + ePerfBGImmDirect=69, //!< count Imm compactions happened directly + ePerfBGImmQueued=70, //!< count Imm compactions placed on queue + ePerfBGImmDequeued=71, //!< count Imm compactions removed from queue + ePerfBGImmWeighted=72, //!< total microseconds item spent on queue + + ePerfBGUnmapDirect=73, //!< count Unmap operations happened directly + ePerfBGUnmapQueued=74, //!< count Unmap operations placed on queue + ePerfBGUnmapDequeued=75,//!< count Unmap operations removed from queue + ePerfBGUnmapWeighted=76,//!< total microseconds item spent on queue + + ePerfBGLevel0Direct=77, //!< count Level0 compactions happened directly + ePerfBGLevel0Queued=78, //!< count Level0 compactions placed on queue + ePerfBGLevel0Dequeued=79,//!< count Level0 compactions removed from queue + ePerfBGLevel0Weighted=80,//!< total microseconds item spent on queue + + ePerfBGCompactDirect=81, //!< count generic compactions happened directly + ePerfBGCompactQueued=82, //!< count generic compactions placed on queue + ePerfBGCompactDequeued=83,//!< count generic compactions removed from queue + ePerfBGCompactWeighted=84,//!< total microseconds item spent on queue + + ePerfFileCacheInsert=85, //!< total bytes inserted into file cache + ePerfFileCacheRemove=86, //!< total bytes removed from file cache + + ePerfBlockCacheInsert=87, //!< total bytes inserted into block cache + ePerfBlockCacheRemove=88, //!< total bytes removed from block cache + + ePerfApiDelete=89, //!< Count of DB::Delete + + // must follow last index name to represent size of array + // (ASSUMES previous enum is highest value) + ePerfCountEnumSize, //!< size of the array described by the enum values + + ePerfVersion=1, //!< structure versioning + ePerfKey=41207 //!< random number as shared memory identifier +}; + +// +// Do NOT use virtual functions. This structure will be aligned at different +// locations in multiple processes. Things can get messy with virtuals. + +struct PerformanceCounters +{ +public: + static int m_LastError; + +protected: + uint32_t m_Version; //!< object revision identification + uint32_t m_CounterSize; //!< number of objects in m_Counter + + volatile uint64_t m_Counter[ePerfCountEnumSize]; + + static const char * m_PerfCounterNames[]; + static int m_PerfSharedId; + static volatile uint64_t m_BogusCounter; //!< for out of range GetPtr calls + +public: + // only called for local object, not for shared memory + PerformanceCounters(); + + //!< does executable's idea of version match shared object? + bool VersionTest() + {return(ePerfCountEnumSize<=m_CounterSize && ePerfVersion==m_Version);}; + + //!< mostly for perf_count_test.cc + void SetVersion(uint32_t Version, uint32_t CounterSize) + {m_Version=Version; m_CounterSize=CounterSize;}; + + static PerformanceCounters * Init(bool IsReadOnly); + static int Close(PerformanceCounters * Counts); + + uint64_t Inc(unsigned Index); + uint64_t Dec(unsigned Index); + + // add value to the counter + uint64_t Add(unsigned Index, uint64_t Amount); + + // return value of a counter + uint64_t Value(unsigned Index) const; + + // set a value + void Set(unsigned Index, uint64_t); + + volatile const uint64_t * GetPtr(unsigned Index) const; + + static const char * GetNamePtr(unsigned Index); + + int LookupCounter(const char * Name); + + void Dump(); + +}; // struct PerformanceCounters + +extern PerformanceCounters * gPerfCounters; + +} // namespace leveldb + +#endif // STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc index fbf6d00d20e..f156a29c139 100644 --- a/api/leveldb/leveldb_wt.cc +++ b/api/leveldb/leveldb_wt.cc @@ -42,15 +42,35 @@ using leveldb::Range; using leveldb::Slice; using leveldb::Snapshot; using leveldb::Status; -#ifdef HAVE_HYPERLEVELDB +#if HAVE_ELEVELDB +using leveldb::Value; namespace leveldb { -class ReplayIterator; +Value::~Value() {} + +class StringValue : public Value { + public: + explicit StringValue(std::string& val) : value_(val) {} + ~StringValue() {} + + StringValue& assign(const char* data, size_t size) { + value_.assign(data, size); + return *this; + } + + private: + std::string& value_; +}; } #endif #define WT_URI "table:data" -#define WT_CONN_CONFIG "log=(enabled),checkpoint_sync=false,session_max=256," -#define WT_TABLE_CONFIG "type=lsm,leaf_page_max=4KB,leaf_item_max=1KB," +#define WT_CONN_CONFIG "log=(enabled),checkpoint_sync=false,session_max=8192,"\ + "mmap=false,transaction_sync=(enabled=true,method=none)," +#define WT_TABLE_CONFIG "type=lsm,leaf_page_max=4KB,leaf_item_max=1KB," \ + "internal_page_max=128K,lsm=(chunk_size=100MB," \ + "bloom_config=(leaf_page_max=8MB)," \ + "bloom_bit_count=28,bloom_hash_count=19," \ + "bloom_oldest=true)," /* Destructors required for interfaces. */ leveldb::DB::~DB() {} @@ -153,13 +173,17 @@ public: }; }; - namespace leveldb { FilterPolicy::~FilterPolicy() {} const FilterPolicy *NewBloomFilterPolicy(int bits_per_key) { return new FilterPolicyImpl(bits_per_key); } +#if HAVE_ELEVELDB +const FilterPolicy *NewBloomFilterPolicy2(int bits_per_key) { + return NewBloomFilterPolicy(bits_per_key); +} +#endif Cache::~Cache() {} @@ -359,6 +383,11 @@ public: virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value); +#if HAVE_ELEVELDB + virtual Status Get(const ReadOptions& options, + const Slice& key, Value* value); +#endif + #ifdef HAVE_HYPERLEVELDB virtual Status LiveBackup(const Slice& name) { return Status::NotSupported("sorry!"); @@ -460,13 +489,17 @@ leveldb::DB::Open(const Options &options, const std::string &name, leveldb::DB * s_conn << "exclusive,"; if (options.compression == kSnappyCompression) s_conn << "extensions=[libwiredtiger_snappy.so],"; - size_t cache_size = 25 * options.write_buffer_size; + size_t cache_size = 2 * options.write_buffer_size; + cache_size += options.max_open_files * (4 << 20); if (options.block_cache) cache_size += ((CacheImpl *)options.block_cache)->capacity_; + else + cache_size += 100 << 20; s_conn << "cache_size=" << cache_size << ","; std::string conn_config = s_conn.str(); WT_CONNECTION *conn; + fprintf(stderr,"Open: Home %s config %s\r\n",name.c_str(),conn_config.c_str()); int ret = ::wiredtiger_open(name.c_str(), NULL, conn_config.c_str(), &conn); if (ret == ENOENT) return Status::NotFound(Slice("Database does not exist.")); @@ -675,6 +708,51 @@ err: return WiredTigerErrorToStatus(ret, errmsg); } +#if HAVE_ELEVELDB +// If the database contains an entry for "key" store the +// corresponding value in *value and return OK. +// +// If there is no entry for "key" leave *value unchanged and return +// a status for which Status::IsNotFound() returns true. +// +// May return some other Status on an error. +Status +DbImpl::Get(const ReadOptions& options, + const Slice& key, Value* value) +{ + WT_CURSOR *cursor; + WT_ITEM item; + const SnapshotImpl *si = NULL; + const char *errmsg = NULL; + + // Read options can contain a snapshot for us to use + if (options.snapshot == NULL) { + cursor = getCursor(); + } else { + si = static_cast<const SnapshotImpl *>(options.snapshot); + if (!si->getStatus().ok()) + return si->getStatus(); + cursor = si->getCursor(); + } + + item.data = key.data(); + item.size = key.size(); + cursor->set_key(cursor, &item); + int ret = cursor->search(cursor); + if (ret == 0) { + ret = cursor->get_value(cursor, &item); + if (ret == 0) + value->assign((const char *)item.data, item.size); + } else if (ret == WT_NOTFOUND) + errmsg = "DB::Get key not found"; +err: + // Release the cursor if we are not in a snapshot + if (si == NULL) + releaseCursor(cursor); + return WiredTigerErrorToStatus(ret, errmsg); +} +#endif + // Return a heap-allocated iterator over the contents of the database. // The result of NewIterator() is initially invalid (caller must // call one of the Seek methods on the iterator before using it). diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h index 3025f3db0d5..520068547a6 100644 --- a/api/leveldb/leveldb_wt.h +++ b/api/leveldb/leveldb_wt.h @@ -27,7 +27,7 @@ #include "wiredtiger_config.h" -#ifdef HAVE_HYPERLEVELDB +#ifdef HAVE_HYPERLEVELDB #include <hyperleveldb/cache.h> #include <hyperleveldb/comparator.h> #include <hyperleveldb/db.h> @@ -43,10 +43,14 @@ #include "leveldb/db.h" #include "leveldb/env.h" #include "leveldb/filter_policy.h" +#include "leveldb/options.h" #include "leveldb/slice.h" #include "leveldb/status.h" #include "leveldb/table_builder.h" #include "leveldb/write_batch.h" #endif +#if HAVE_ELEVELDB +#include "leveldb/perf_count.h" +#endif #include "wiredtiger.h" diff --git a/api/leveldb/util/perf_count.cc b/api/leveldb/util/perf_count.cc new file mode 100644 index 00000000000..d086f29379a --- /dev/null +++ b/api/leveldb/util/perf_count.cc @@ -0,0 +1,657 @@ +// ------------------------------------------------------------------- +// +// perf_count.cc: performance counters LevelDB +// +// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved. +// +// This file is provided to you under the Apache License, +// Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// ------------------------------------------------------------------- + +#include <limits.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <syslog.h> +#include <memory.h> +#include <errno.h> + +#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ +#include "leveldb/perf_count.h" +#endif + +#include "util/coding.h" + +#define __STDC_FORMAT_MACROS +#include <inttypes.h> + +#ifdef OS_SOLARIS +# include <atomic.h> +#endif + + +namespace leveldb +{ + +// always have something active in gPerfCounters, eliminates +// need to test for "is shared object attached yet" +static PerformanceCounters LocalStartupCounters; +PerformanceCounters * gPerfCounters(&LocalStartupCounters); + + + SstCounters::SstCounters() + : m_IsReadOnly(false), + m_Version(eSstCountVersion), + m_CounterSize(eSstCountEnumSize) + { + memset(m_Counter, 0, sizeof(m_Counter)); + + m_Counter[eSstCountKeySmallest]=ULLONG_MAX; + m_Counter[eSstCountValueSmallest]=ULLONG_MAX; + + return; + + }; // SstCounters::SstCounters + + + void + SstCounters::EncodeTo( + std::string & Dst) const + { + unsigned loop; + + PutVarint32(&Dst, m_Version); + PutVarint32(&Dst, m_CounterSize); + + for(loop=0; loop<eSstCountEnumSize; ++loop) + PutVarint64(&Dst, m_Counter[loop]); + } // SstCounters::EncodeTo + + + Status + SstCounters::DecodeFrom( + const Slice& src) + { + Status ret_status; + Slice cursor; + bool good; + int loop; + + cursor=src; + m_IsReadOnly=true; + good=GetVarint32(&cursor, &m_Version); + good=good && (m_Version<=eSstCountVersion); + + // all lesser number of stats to be read + good=good && GetVarint32(&cursor, &m_CounterSize); + if (good && eSstCountEnumSize < m_CounterSize) + m_CounterSize=eSstCountEnumSize; + + for (loop=0; good && loop<eSstCountEnumSize; ++loop) + { + good=GetVarint64(&cursor, &m_Counter[loop]); + } // for + + // if (!good) change ret_status to bad + + return(ret_status); + + } // SstCounters::DecodeFrom + + + uint64_t + SstCounters::Inc( + unsigned Index) + { + uint64_t ret_val; + + ret_val=0; + if (!m_IsReadOnly && Index<m_CounterSize) + { + ++m_Counter[Index]; + ret_val=m_Counter[Index]; + } // if + + return(ret_val); + } // SstCounters::Inc + + + uint64_t + SstCounters::Add( + unsigned Index, + uint64_t Amount) + { + uint64_t ret_val; + + ret_val=0; + if (!m_IsReadOnly && Index<m_CounterSize) + { + m_Counter[Index]+=Amount; + ret_val=m_Counter[Index]; + } // if + + return(ret_val); + } // SstCounters::Add + + + uint64_t + SstCounters::Value( + unsigned Index) const + { + uint64_t ret_val; + + ret_val=0; + if (Index<m_CounterSize) + { + ret_val=m_Counter[Index]; + } // if + + return(ret_val); + } // SstCounters::Value + + + void + SstCounters::Set( + unsigned Index, + uint64_t Value) + { + if (Index<m_CounterSize) + { + m_Counter[Index]=Value; + } // if + + return; + } // SstCounters::Set + + + void + SstCounters::Dump() const + { + unsigned loop; + + printf("SstCounters:\n"); + printf(" m_IsReadOnly: %u\n", m_IsReadOnly); + printf(" m_Version: %u\n", m_Version); + printf(" m_CounterSize: %u\n", m_CounterSize); + for (loop=0; loop<m_CounterSize; ++loop) + printf(" Counter[%2u]: %" PRIu64 "\n", loop, m_Counter[loop]); + + return; + + } // SstCounters::Dump + + + // only used for local static objects, not shared memory objects + PerformanceCounters::PerformanceCounters() + { + m_Version=ePerfVersion; + m_CounterSize=ePerfCountEnumSize; + // cast away "volatile" + memset((void*)m_Counter, 0, sizeof(m_Counter)); + + return; + + } // PerformanceCounters::PerformanceCounters + + + PerformanceCounters * + PerformanceCounters::Init( + bool IsReadOnly) + { + PerformanceCounters * ret_ptr; + bool should_create, good; + int ret_val, id; + struct shmid_ds shm_info; + size_t open_size; + + ret_ptr=NULL; + memset(&shm_info, 0, sizeof(shm_info)); + good=true; + open_size=sizeof(PerformanceCounters); + + // first id attempt, minimal request + id=shmget(ePerfKey, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (-1!=id) + ret_val=shmctl(id, IPC_STAT, &shm_info); + else + ret_val=-1; + + // does the shared memory already exists (and of proper size if writing) + should_create=(0!=ret_val || (shm_info.shm_segsz < sizeof(PerformanceCounters))) && !IsReadOnly; + + // should old shared memory be deleted? + if (should_create && 0==ret_val) + { + ret_val=shmctl(id, IPC_RMID, &shm_info); + good=(0==ret_val); + if (0!=ret_val) + syslog(LOG_ERR, "shmctl IPC_RMID failed [%d, %m]", errno); + } // if + + // else open the size that exists + else if (0==ret_val) + { + open_size=shm_info.shm_segsz; + } // else if + + // attempt to attach/create to shared memory instance + if (good) + { + int flags; + + if (IsReadOnly) + flags = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + else + flags = IPC_CREAT | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + + m_PerfSharedId=shmget(ePerfKey, open_size, flags); + good=(-1!=m_PerfSharedId); + } // if + + // map shared memory instance + if (good) + { + ret_ptr=(PerformanceCounters *)shmat(m_PerfSharedId, NULL, (IsReadOnly ? SHM_RDONLY : 0)); + if ((void*)-1 != ret_ptr) + { + // initialize? + if (should_create || ePerfVersion!=ret_ptr->m_Version) + { + if (!IsReadOnly) + { + memset(ret_ptr, 0, sizeof(PerformanceCounters)); + ret_ptr->m_Version=ePerfVersion; + ret_ptr->m_CounterSize=ePerfCountEnumSize; + } // if + + // bad version match to existing segment + else + { + good=false; + errno=EINVAL; + } // else + } // if + } // if + else + { + good=false; + syslog(LOG_ERR, "shmat failed [%d, %m]", errno); + } // else + + if (good) + { + // make this available process wide + gPerfCounters=ret_ptr; + } // if + else + { + ret_ptr=NULL; + m_LastError=errno; + } // else + } // if + else + { + m_LastError=errno; + ret_ptr=NULL; + } // else + + return(ret_ptr); + + }; // PerformanceCounters::Init + + + int + PerformanceCounters::Close( + PerformanceCounters * Counts) + { + int ret_val; + + if (NULL!=Counts && &LocalStartupCounters != Counts) + { + // keep gPerf valid + if (gPerfCounters==Counts) + gPerfCounters=&LocalStartupCounters; + + ret_val=shmdt(Counts); + if (0!=ret_val) + ret_val=errno; + } // if + else + { + ret_val=EINVAL; + } // else + + return(ret_val); + } // PerformanceCounters::Close + + + uint64_t + PerformanceCounters::Inc( + unsigned Index) + { + uint64_t ret_val; + + ret_val=0; + if (Index<m_CounterSize) + { + volatile uint64_t * val_ptr; + + val_ptr=&m_Counter[Index]; + +# if ULONG_MAX != 4294967295UL +#ifdef OS_SOLARIS + atomic_inc_64(val_ptr); +#else + __sync_add_and_fetch(val_ptr, 1); +#endif +#else + // hack fest for 64 bit semi-atomic on 32bit machine + uint32_t ret_32, * ptr_32; + + ptr_32=(uint32_t *)&val_ptr; + ret_32=__sync_add_and_fetch(ptr_32, 1); + if (0==ret_32) + { + ++ptr_32; + __sync_add_and_fetch(ptr_32, 1); + } // if +#endif + ret_val=*val_ptr; + } // if + + return(ret_val); + } // PerformanceCounters::Inc + + + uint64_t + PerformanceCounters::Dec( + unsigned Index) + { + uint64_t ret_val; + + ret_val=0; + if (Index<m_CounterSize) + { + volatile uint64_t * val_ptr; + + val_ptr=&m_Counter[Index]; + +# if ULONG_MAX != 4294967295UL +#ifdef OS_SOLARIS + atomic_dec_64(val_ptr); +#else + __sync_sub_and_fetch(val_ptr, 1); +#endif +#else + // hack fest for 64 bit semi-atomic on 32bit machine + uint32_t ret_32, * ptr_32; + + ptr_32=(uint32_t *)&val_ptr; + ret_32=__sync_sub_and_fetch(ptr_32, 1); + if (0xFFFFFFFF==ret_32) + { + ++ptr_32; + __sync_sub_and_fetch(ptr_32, 1); + } // if +#endif + ret_val=*val_ptr; + } // if + + return(ret_val); + } // PerformanceCounters::Dec + + + uint64_t + PerformanceCounters::Add( + unsigned Index, + uint64_t Amount) + { + uint64_t ret_val; + + ret_val=0; + if (Index<m_CounterSize) + { + volatile uint64_t * val_ptr; + + val_ptr=&m_Counter[Index]; + +# if ULONG_MAX != 4294967295UL +#ifdef OS_SOLARIS + ret_val=atomic_add_64_nv(val_ptr, Amount); +#else + ret_val=__sync_add_and_fetch(val_ptr, Amount); +#endif +#else + // hack fest for 64 bit semi-atomic on 32bit machine + uint32_t old_32, ret_32, * ptr_32; + + ptr_32=(uint32_t *)&val_ptr; + old_32=*ptr_32; + ret_32=__sync_add_and_fetch(ptr_32, Amount); + if (ret_32<old_32) + { + ++ptr_32; + __sync_add_and_fetch(ptr_32, 1); + } // if + + ret_val=*val_ptr; +#endif + } // if + + return(ret_val); + } // PerformanceCounters::Add + + + uint64_t + PerformanceCounters::Value( + unsigned Index) const + { + uint64_t ret_val; + + ret_val=0; + if (Index<m_CounterSize) + { + ret_val=m_Counter[Index]; + } // if + + return(ret_val); + } // SstCounters::Value + + + void + PerformanceCounters::Set( + unsigned Index, + uint64_t Amount) + { + if (Index<m_CounterSize) + { + volatile uint64_t * val_ptr; + + val_ptr=&m_Counter[Index]; + + *val_ptr=Amount; + } // if + + return; + } // PerformanceCounters::Set + + + volatile const uint64_t * + PerformanceCounters::GetPtr( + unsigned Index) const + { + const volatile uint64_t * ret_ptr; + + if (Index<m_CounterSize) + ret_ptr=&m_Counter[Index]; + else + ret_ptr=&m_BogusCounter; + + return(ret_ptr); + + } // PerformanceCounters::GetPtr + + + const char * + PerformanceCounters::GetNamePtr( + unsigned Index) + { + const char * ret_ptr; + + if (Index<ePerfCountEnumSize) + ret_ptr=m_PerfCounterNames[Index]; + else + ret_ptr="???"; + + return(ret_ptr); + + } // PerformanceCounters::GetPtr + + + int PerformanceCounters::m_PerfSharedId=-1; + int PerformanceCounters::m_LastError=0; + volatile uint64_t PerformanceCounters::m_BogusCounter=0; + const char * PerformanceCounters::m_PerfCounterNames[]= + { + "ROFileOpen", + "ROFileClose", + "ROFileUnmap", + "RWFileOpen", + "RWFileClose", + "RWFileUnmap", + "ApiOpen", + "ApiGet", + "ApiWrite", + "WriteSleep", + "WriteWaitImm", + "WriteWaitLevel0", + "WriteNewMem", + "WriteError", + "WriteNoWait", + "GetMem", + "GetImm", + "GetVersion", + "SearchLevel[0]", + "SearchLevel[1]", + "SearchLevel[2]", + "SearchLevel[3]", + "SearchLevel[4]", + "SearchLevel[5]", + "SearchLevel[6]", + "TableCached", + "TableOpened", + "TableGet", + "BGCloseUnmap", + "BGCompactImm", + "BGNormal", + "BGCompactLevel0", + "BlockFiltered", + "BlockFilterFalse", + "BlockCached", + "BlockRead", + "BlockFilterRead", + "BlockValidGet", + "Debug[0]", + "Debug[1]", + "Debug[2]", + "Debug[3]", + "Debug[4]", + "ReadBlockError", + "DBIterNew", + "DBIterNext", + "DBIterPrev", + "DBIterSeek", + "DBIterSeekFirst", + "DBIterSeekLast", + "DBIterDelete", + "eleveldbDirect", + "eleveldbQueued", + "eleveldbDequeued", + "elevelRefCreate", + "elevelRefDelete", + "ThrottleGauge", + "ThrottleCounter", + "ThrottleMicros0", + "ThrottleKeys0", + "ThrottleBacklog0", + "ThrottleCompacts0", + "ThrottleMicros1", + "ThrottleKeys1", + "ThrottleBacklog1", + "ThrottleCompacts1", + "BGWriteError", + "ThrottleWait", + "ThreadError", + "BGImmDirect", + "BGImmQueued", + "BGImmDequeued", + "BGImmWeighted", + "BGUnmapDirect", + "BGUnmapQueued", + "BGUnmapDequeued", + "BGUnmapWeighted", + "BGLevel0Direct", + "BGLevel0Queued", + "BGLevel0Dequeued", + "BGLevel0Weighted", + "BGCompactDirect", + "BGCompactQueued", + "BGCompactDequeued", + "BGCompactWeighted", + "FileCacheInsert", + "FileCacheRemove", + "BlockCacheInsert", + "BlockCacheRemove", + "ApiDelete" + }; + + + int + PerformanceCounters::LookupCounter( + const char * Name) + { + int index,loop; + + index=-1; + + if (NULL!=Name && '\0'!=*Name) + { + for (loop=0; loop<ePerfCountEnumSize && -1==index; ++loop) + { + if (0==strcmp(m_PerfCounterNames[loop], Name)) + index=loop; + } // loop + } // if + + return(index); + }; + + void + PerformanceCounters::Dump() + { + int loop; + + printf(" m_Version: %u\n", m_Version); + printf(" m_CounterSize: %u\n", m_CounterSize); + + for (loop=0; loop<ePerfCountEnumSize; ++loop) + { + printf(" %s: %" PRIu64 "\n", m_PerfCounterNames[loop], m_Counter[loop]); + } // loop + }; // Dump + +} // namespace leveldb |