summaryrefslogtreecommitdiff
path: root/api
diff options
context:
space:
mode:
Diffstat (limited to 'api')
-rw-r--r--api/leveldb/Makefile.am18
-rw-r--r--api/leveldb/db/dbformat.h223
-rw-r--r--api/leveldb/db/skiplist.h379
-rw-r--r--api/leveldb/db/write_batch.cc109
-rw-r--r--api/leveldb/db/write_batch_internal.h39
-rw-r--r--api/leveldb/leveldb_test.cc36
-rw-r--r--api/leveldb/leveldb_wt.cc673
-rw-r--r--api/leveldb/leveldb_wt.h23
-rw-r--r--api/leveldb/port/port.h11
-rw-r--r--api/leveldb/util/arena.h68
-rw-r--r--api/leveldb/util/coding.cc194
-rw-r--r--api/leveldb/util/coding.h104
-rw-r--r--api/leveldb/util/comparator.cc80
-rw-r--r--api/leveldb/util/env.cc96
-rw-r--r--api/leveldb/util/env_posix.cc617
-rw-r--r--api/leveldb/util/logging.cc80
-rw-r--r--api/leveldb/util/logging.h47
-rw-r--r--api/leveldb/util/options.cc29
-rw-r--r--api/leveldb/util/posix_logger.h98
-rw-r--r--api/leveldb/util/random.h72
-rw-r--r--api/leveldb/util/status.cc74
21 files changed, 3070 insertions, 0 deletions
diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am
new file mode 100644
index 00000000000..740dfb16456
--- /dev/null
+++ b/api/leveldb/Makefile.am
@@ -0,0 +1,18 @@
+AM_CPPFLAGS = -I$(top_builddir)
+
+lib_LTLIBRARIES = libwiredtiger_leveldb.la
+LDADD = $(lib_LTLIBRARIES) $(top_builddir)/libwiredtiger.la
+
+noinst_PROGRAMS = leveldb_test
+
+libwiredtiger_leveldb_la_LDFLAGS = -release @VERSION@
+libwiredtiger_leveldb_la_SOURCES = \
+ leveldb_wt.cc \
+ db/write_batch.cc \
+ util/coding.cc util/comparator.cc util/env.cc util/env_posix.cc \
+ util/logging.cc util/options.cc util/status.cc
+
+leveldb_test_SOURCES = leveldb_test.cc
+#leveldb_test_LDADD = $(top_builddir)/libwiredtiger.la
+
+TESTS = $(noinst_PROGRAMS)
diff --git a/api/leveldb/db/dbformat.h b/api/leveldb/db/dbformat.h
new file mode 100644
index 00000000000..37c21ed95e7
--- /dev/null
+++ b/api/leveldb/db/dbformat.h
@@ -0,0 +1,223 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_FORMAT_H_
+#define STORAGE_LEVELDB_DB_FORMAT_H_
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+#include "util/coding.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+// Grouping of constants. We may want to make some of these
+// parameters set via options.
+namespace config {
+static const int kNumLevels = 7;
+
+// Level-0 compaction is started when we hit this many files.
+static const int kL0_CompactionTrigger = 4;
+
+// Soft limit on number of level-0 files. We slow down writes at this point.
+static const int kL0_SlowdownWritesTrigger = 8;
+
+// Maximum number of level-0 files. We stop writes at this point.
+static const int kL0_StopWritesTrigger = 12;
+
+// Maximum level to which a new compacted memtable is pushed if it
+// does not create overlap. We try to push to level 2 to avoid the
+// relatively expensive level 0=>1 compactions and to avoid some
+// expensive manifest file operations. We do not push all the way to
+// the largest level since that can generate a lot of wasted disk
+// space if the same key space is being repeatedly overwritten.
+static const int kMaxMemCompactLevel = 2;
+
+} // namespace config
+
+class InternalKey;
+
+// Value types encoded as the last component of internal keys.
+// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
+// data structures.
+enum ValueType {
+ kTypeDeletion = 0x0,
+ kTypeValue = 0x1
+};
+// kValueTypeForSeek defines the ValueType that should be passed when
+// constructing a ParsedInternalKey object for seeking to a particular
+// sequence number (since we sort sequence numbers in decreasing order
+// and the value type is embedded as the low 8 bits in the sequence
+// number in internal keys, we need to use the highest-numbered
+// ValueType, not the lowest).
+static const ValueType kValueTypeForSeek = kTypeValue;
+
+typedef uint64_t SequenceNumber;
+
+// We leave eight bits empty at the bottom so a type and sequence#
+// can be packed together into 64-bits.
+static const SequenceNumber kMaxSequenceNumber =
+ ((0x1ull << 56) - 1);
+
+struct ParsedInternalKey {
+ Slice user_key;
+ SequenceNumber sequence;
+ ValueType type;
+
+ ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
+ ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
+ : user_key(u), sequence(seq), type(t) { }
+ std::string DebugString() const;
+};
+
+// Return the length of the encoding of "key".
+inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
+ return key.user_key.size() + 8;
+}
+
+// Append the serialization of "key" to *result.
+extern void AppendInternalKey(std::string* result,
+ const ParsedInternalKey& key);
+
+// Attempt to parse an internal key from "internal_key". On success,
+// stores the parsed data in "*result", and returns true.
+//
+// On error, returns false, leaves "*result" in an undefined state.
+extern bool ParseInternalKey(const Slice& internal_key,
+ ParsedInternalKey* result);
+
+// Returns the user key portion of an internal key.
+inline Slice ExtractUserKey(const Slice& internal_key) {
+ assert(internal_key.size() >= 8);
+ return Slice(internal_key.data(), internal_key.size() - 8);
+}
+
+inline ValueType ExtractValueType(const Slice& internal_key) {
+ assert(internal_key.size() >= 8);
+ const size_t n = internal_key.size();
+ uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+ unsigned char c = num & 0xff;
+ return static_cast<ValueType>(c);
+}
+
+// A comparator for internal keys that uses a specified comparator for
+// the user key portion and breaks ties by decreasing sequence number.
+class InternalKeyComparator : public Comparator {
+ private:
+ const Comparator* user_comparator_;
+ public:
+ explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
+ virtual const char* Name() const;
+ virtual int Compare(const Slice& a, const Slice& b) const;
+ virtual void FindShortestSeparator(
+ std::string* start,
+ const Slice& limit) const;
+ virtual void FindShortSuccessor(std::string* key) const;
+
+ const Comparator* user_comparator() const { return user_comparator_; }
+
+ int Compare(const InternalKey& a, const InternalKey& b) const;
+};
+
+// Filter policy wrapper that converts from internal keys to user keys
+class InternalFilterPolicy : public FilterPolicy {
+ private:
+ const FilterPolicy* const user_policy_;
+ public:
+ explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
+ virtual const char* Name() const;
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
+ virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
+};
+
+// Modules in this directory should keep internal keys wrapped inside
+// the following class instead of plain strings so that we do not
+// incorrectly use string comparisons instead of an InternalKeyComparator.
+class InternalKey {
+ private:
+ std::string rep_;
+ public:
+ InternalKey() { } // Leave rep_ as empty to indicate it is invalid
+ InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
+ AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
+ }
+
+ void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
+ Slice Encode() const {
+ assert(!rep_.empty());
+ return rep_;
+ }
+
+ Slice user_key() const { return ExtractUserKey(rep_); }
+
+ void SetFrom(const ParsedInternalKey& p) {
+ rep_.clear();
+ AppendInternalKey(&rep_, p);
+ }
+
+ void Clear() { rep_.clear(); }
+
+ std::string DebugString() const;
+};
+
+inline int InternalKeyComparator::Compare(
+ const InternalKey& a, const InternalKey& b) const {
+ return Compare(a.Encode(), b.Encode());
+}
+
+inline bool ParseInternalKey(const Slice& internal_key,
+ ParsedInternalKey* result) {
+ const size_t n = internal_key.size();
+ if (n < 8) return false;
+ uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+ unsigned char c = num & 0xff;
+ result->sequence = num >> 8;
+ result->type = static_cast<ValueType>(c);
+ result->user_key = Slice(internal_key.data(), n - 8);
+ return (c <= static_cast<unsigned char>(kTypeValue));
+}
+
+// A helper class useful for DBImpl::Get()
+class LookupKey {
+ public:
+ // Initialize *this for looking up user_key at a snapshot with
+ // the specified sequence number.
+ LookupKey(const Slice& user_key, SequenceNumber sequence);
+
+ ~LookupKey();
+
+ // Return a key suitable for lookup in a MemTable.
+ Slice memtable_key() const { return Slice(start_, end_ - start_); }
+
+ // Return an internal key (suitable for passing to an internal iterator)
+ Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
+
+ // Return the user key
+ Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
+
+ private:
+ // We construct a char array of the form:
+ // klength varint32 <-- start_
+ // userkey char[klength] <-- kstart_
+ // tag uint64
+ // <-- end_
+ // The array is a suitable MemTable key.
+ // The suffix starting with "userkey" can be used as an InternalKey.
+ const char* start_;
+ const char* kstart_;
+ const char* end_;
+ char space_[200]; // Avoid allocation for short keys
+
+ // No copying allowed
+ LookupKey(const LookupKey&);
+ void operator=(const LookupKey&);
+};
+
+inline LookupKey::~LookupKey() {
+ if (start_ != space_) delete[] start_;
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_DB_FORMAT_H_
diff --git a/api/leveldb/db/skiplist.h b/api/leveldb/db/skiplist.h
new file mode 100644
index 00000000000..af85be6d016
--- /dev/null
+++ b/api/leveldb/db/skiplist.h
@@ -0,0 +1,379 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Thread safety
+// -------------
+//
+// Writes require external synchronization, most likely a mutex.
+// Reads require a guarantee that the SkipList will not be destroyed
+// while the read is in progress. Apart from that, reads progress
+// without any internal locking or synchronization.
+//
+// Invariants:
+//
+// (1) Allocated nodes are never deleted until the SkipList is
+// destroyed. This is trivially guaranteed by the code since we
+// never delete any skip list nodes.
+//
+// (2) The contents of a Node except for the next/prev pointers are
+// immutable after the Node has been linked into the SkipList.
+// Only Insert() modifies the list, and it is careful to initialize
+// a node and use release-stores to publish the nodes in one or
+// more lists.
+//
+// ... prev vs. next pointer ordering ...
+
+#include <assert.h>
+#include <stdlib.h>
+#include "port/port.h"
+#include "util/arena.h"
+#include "util/random.h"
+
+namespace leveldb {
+
+class Arena;
+
+template<typename Key, class Comparator>
+class SkipList {
+ private:
+ struct Node;
+
+ public:
+ // Create a new SkipList object that will use "cmp" for comparing keys,
+ // and will allocate memory using "*arena". Objects allocated in the arena
+ // must remain allocated for the lifetime of the skiplist object.
+ explicit SkipList(Comparator cmp, Arena* arena);
+
+ // Insert key into the list.
+ // REQUIRES: nothing that compares equal to key is currently in the list.
+ void Insert(const Key& key);
+
+ // Returns true iff an entry that compares equal to key is in the list.
+ bool Contains(const Key& key) const;
+
+ // Iteration over the contents of a skip list
+ class Iterator {
+ public:
+ // Initialize an iterator over the specified list.
+ // The returned iterator is not valid.
+ explicit Iterator(const SkipList* list);
+
+ // Returns true iff the iterator is positioned at a valid node.
+ bool Valid() const;
+
+ // Returns the key at the current position.
+ // REQUIRES: Valid()
+ const Key& key() const;
+
+ // Advances to the next position.
+ // REQUIRES: Valid()
+ void Next();
+
+ // Advances to the previous position.
+ // REQUIRES: Valid()
+ void Prev();
+
+ // Advance to the first entry with a key >= target
+ void Seek(const Key& target);
+
+ // Position at the first entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToFirst();
+
+ // Position at the last entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToLast();
+
+ private:
+ const SkipList* list_;
+ Node* node_;
+ // Intentionally copyable
+ };
+
+ private:
+ enum { kMaxHeight = 12 };
+
+ // Immutable after construction
+ Comparator const compare_;
+ Arena* const arena_; // Arena used for allocations of nodes
+
+ Node* const head_;
+
+ // Modified only by Insert(). Read racily by readers, but stale
+ // values are ok.
+ port::AtomicPointer max_height_; // Height of the entire list
+
+ inline int GetMaxHeight() const {
+ return static_cast<int>(
+ reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
+ }
+
+ // Read/written only by Insert().
+ Random rnd_;
+
+ Node* NewNode(const Key& key, int height);
+ int RandomHeight();
+ bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
+
+ // Return true if key is greater than the data stored in "n"
+ bool KeyIsAfterNode(const Key& key, Node* n) const;
+
+ // Return the earliest node that comes at or after key.
+ // Return NULL if there is no such node.
+ //
+ // If prev is non-NULL, fills prev[level] with pointer to previous
+ // node at "level" for every level in [0..max_height_-1].
+ Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
+
+ // Return the latest node with a key < key.
+ // Return head_ if there is no such node.
+ Node* FindLessThan(const Key& key) const;
+
+ // Return the last node in the list.
+ // Return head_ if list is empty.
+ Node* FindLast() const;
+
+ // No copying allowed
+ SkipList(const SkipList&);
+ void operator=(const SkipList&);
+};
+
+// Implementation details follow
+template<typename Key, class Comparator>
+struct SkipList<Key,Comparator>::Node {
+ explicit Node(const Key& k) : key(k) { }
+
+ Key const key;
+
+ // Accessors/mutators for links. Wrapped in methods so we can
+ // add the appropriate barriers as necessary.
+ Node* Next(int n) {
+ assert(n >= 0);
+ // Use an 'acquire load' so that we observe a fully initialized
+ // version of the returned Node.
+ return reinterpret_cast<Node*>(next_[n].Acquire_Load());
+ }
+ void SetNext(int n, Node* x) {
+ assert(n >= 0);
+ // Use a 'release store' so that anybody who reads through this
+ // pointer observes a fully initialized version of the inserted node.
+ next_[n].Release_Store(x);
+ }
+
+ // No-barrier variants that can be safely used in a few locations.
+ Node* NoBarrier_Next(int n) {
+ assert(n >= 0);
+ return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
+ }
+ void NoBarrier_SetNext(int n, Node* x) {
+ assert(n >= 0);
+ next_[n].NoBarrier_Store(x);
+ }
+
+ private:
+ // Array of length equal to the node height. next_[0] is lowest level link.
+ port::AtomicPointer next_[1];
+};
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
+ char* mem = arena_->AllocateAligned(
+ sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
+ return new (mem) Node(key);
+}
+
+template<typename Key, class Comparator>
+inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
+ list_ = list;
+ node_ = NULL;
+}
+
+template<typename Key, class Comparator>
+inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
+ return node_ != NULL;
+}
+
+template<typename Key, class Comparator>
+inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
+ assert(Valid());
+ return node_->key;
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Next() {
+ assert(Valid());
+ node_ = node_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Prev() {
+ // Instead of using explicit "prev" links, we just search for the
+ // last node that falls before key.
+ assert(Valid());
+ node_ = list_->FindLessThan(node_->key);
+ if (node_ == list_->head_) {
+ node_ = NULL;
+ }
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
+ node_ = list_->FindGreaterOrEqual(target, NULL);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
+ node_ = list_->head_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
+ node_ = list_->FindLast();
+ if (node_ == list_->head_) {
+ node_ = NULL;
+ }
+}
+
+template<typename Key, class Comparator>
+int SkipList<Key,Comparator>::RandomHeight() {
+ // Increase height with probability 1 in kBranching
+ static const unsigned int kBranching = 4;
+ int height = 1;
+ while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
+ height++;
+ }
+ assert(height > 0);
+ assert(height <= kMaxHeight);
+ return height;
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
+ // NULL n is considered infinite
+ return (n != NULL) && (compare_(n->key, key) < 0);
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
+ const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ Node* next = x->Next(level);
+ if (KeyIsAfterNode(key, next)) {
+ // Keep searching in this list
+ x = next;
+ } else {
+ if (prev != NULL) prev[level] = x;
+ if (level == 0) {
+ return next;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ assert(x == head_ || compare_(x->key, key) < 0);
+ Node* next = x->Next(level);
+ if (next == NULL || compare_(next->key, key) >= 0) {
+ if (level == 0) {
+ return x;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ } else {
+ x = next;
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
+ const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ Node* next = x->Next(level);
+ if (next == NULL) {
+ if (level == 0) {
+ return x;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ } else {
+ x = next;
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
+ : compare_(cmp),
+ arena_(arena),
+ head_(NewNode(0 /* any key will do */, kMaxHeight)),
+ max_height_(reinterpret_cast<void*>(1)),
+ rnd_(0xdeadbeef) {
+ for (int i = 0; i < kMaxHeight; i++) {
+ head_->SetNext(i, NULL);
+ }
+}
+
+template<typename Key, class Comparator>
+void SkipList<Key,Comparator>::Insert(const Key& key) {
+ // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
+ // here since Insert() is externally synchronized.
+ Node* prev[kMaxHeight];
+ Node* x = FindGreaterOrEqual(key, prev);
+
+ // Our data structure does not allow duplicate insertion
+ assert(x == NULL || !Equal(key, x->key));
+
+ int height = RandomHeight();
+ if (height > GetMaxHeight()) {
+ for (int i = GetMaxHeight(); i < height; i++) {
+ prev[i] = head_;
+ }
+ //fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
+
+ // It is ok to mutate max_height_ without any synchronization
+ // with concurrent readers. A concurrent reader that observes
+ // the new value of max_height_ will see either the old value of
+ // new level pointers from head_ (NULL), or a new value set in
+ // the loop below. In the former case the reader will
+ // immediately drop to the next level since NULL sorts after all
+ // keys. In the latter case the reader will use the new node.
+ max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
+ }
+
+ x = NewNode(key, height);
+ for (int i = 0; i < height; i++) {
+ // NoBarrier_SetNext() suffices since we will add a barrier when
+ // we publish a pointer to "x" in prev[i].
+ x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
+ prev[i]->SetNext(i, x);
+ }
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::Contains(const Key& key) const {
+ Node* x = FindGreaterOrEqual(key, NULL);
+ if (x != NULL && Equal(key, x->key)) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/db/write_batch.cc b/api/leveldb/db/write_batch.cc
new file mode 100644
index 00000000000..6a488a83e3e
--- /dev/null
+++ b/api/leveldb/db/write_batch.cc
@@ -0,0 +1,109 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch::rep_ :=
+// sequence: fixed64
+// count: fixed32
+// data: record[count]
+// record :=
+// kTypeValue varstring varstring |
+// kTypeDeletion varstring
+// varstring :=
+// len: varint32
+// data: uint8[len]
+
+#include "leveldb_wt.h"
+#include "db/write_batch_internal.h"
+
+namespace leveldb {
+
+// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
+static const size_t kHeader = 12;
+
+WriteBatch::WriteBatch() {
+ Clear();
+}
+
+WriteBatch::~WriteBatch() { }
+
+WriteBatch::Handler::~Handler() { }
+
+void WriteBatch::Clear() {
+ rep_.clear();
+ rep_.resize(kHeader);
+}
+
+Status WriteBatch::Iterate(Handler* handler) const {
+ Slice input(rep_);
+ if (input.size() < kHeader) {
+ return Status::Corruption("malformed WriteBatch (too small)");
+ }
+
+ input.remove_prefix(kHeader);
+ Slice key, value;
+ int found = 0;
+ while (!input.empty()) {
+ found++;
+ char tag = input[0];
+ input.remove_prefix(1);
+ switch (tag) {
+ case kTypeValue:
+ if (GetLengthPrefixedSlice(&input, &key) &&
+ GetLengthPrefixedSlice(&input, &value)) {
+ handler->Put(key, value);
+ } else {
+ return Status::Corruption("bad WriteBatch Put");
+ }
+ break;
+ case kTypeDeletion:
+ if (GetLengthPrefixedSlice(&input, &key)) {
+ handler->Delete(key);
+ } else {
+ return Status::Corruption("bad WriteBatch Delete");
+ }
+ break;
+ default:
+ return Status::Corruption("unknown WriteBatch tag");
+ }
+ }
+ if (found != WriteBatchInternal::Count(this)) {
+ return Status::Corruption("WriteBatch has wrong count");
+ } else {
+ return Status::OK();
+ }
+}
+
+int WriteBatchInternal::Count(const WriteBatch* b) {
+ return DecodeFixed32(b->rep_.data() + 8);
+}
+
+void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
+ EncodeFixed32(&b->rep_[8], n);
+}
+
+void WriteBatch::Put(const Slice& key, const Slice& value) {
+ WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+ rep_.push_back(static_cast<char>(kTypeValue));
+ PutLengthPrefixedSlice(&rep_, key);
+ PutLengthPrefixedSlice(&rep_, value);
+}
+
+void WriteBatch::Delete(const Slice& key) {
+ WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+ rep_.push_back(static_cast<char>(kTypeDeletion));
+ PutLengthPrefixedSlice(&rep_, key);
+}
+
+void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
+ assert(contents.size() >= kHeader);
+ b->rep_.assign(contents.data(), contents.size());
+}
+
+void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
+ SetCount(dst, Count(dst) + Count(src));
+ assert(src->rep_.size() >= kHeader);
+ dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/db/write_batch_internal.h b/api/leveldb/db/write_batch_internal.h
new file mode 100644
index 00000000000..b4295004e13
--- /dev/null
+++ b/api/leveldb/db/write_batch_internal.h
@@ -0,0 +1,39 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+
+#include "leveldb_wt.h"
+#include "db/dbformat.h"
+
+namespace leveldb {
+
+// WriteBatchInternal provides static methods for manipulating a
+// WriteBatch that we don't want in the public WriteBatch interface.
+class WriteBatchInternal {
+ public:
+ // Return the number of entries in the batch.
+ static int Count(const WriteBatch* batch);
+
+ // Set the count for the number of entries in the batch.
+ static void SetCount(WriteBatch* batch, int n);
+
+ static Slice Contents(const WriteBatch* batch) {
+ return Slice(batch->rep_);
+ }
+
+ static size_t ByteSize(const WriteBatch* batch) {
+ return batch->rep_.size();
+ }
+
+ static void SetContents(WriteBatch* batch, const Slice& contents);
+
+ static void Append(WriteBatch* dst, const WriteBatch* src);
+};
+
+} // namespace leveldb
+
+
+#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
diff --git a/api/leveldb/leveldb_test.cc b/api/leveldb/leveldb_test.cc
new file mode 100644
index 00000000000..d3b78b4eb4b
--- /dev/null
+++ b/api/leveldb/leveldb_test.cc
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+#include <assert.h>
+#include <iostream>
+#include <hyperleveldb/db.h>
+
+using namespace std;
+
+extern "C" int main() {
+ leveldb::DB* db;
+ leveldb::Options options;
+ options.create_if_missing = true;
+ leveldb::Status s = leveldb::DB::Open(options, "WTLDB_HOME", &db);
+ assert(s.ok());
+
+ s = db->Put(leveldb::WriteOptions(), "key", "value");
+ assert(s.ok());
+
+ leveldb::ReadOptions read_options;
+ read_options.snapshot = db->GetSnapshot();
+ leveldb::Iterator* iter = db->NewIterator(read_options);
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ cout << iter->key().ToString() << ": " << iter->value().ToString() << endl;
+ }
+
+ delete iter;
+ db->ReleaseSnapshot(read_options.snapshot);
+
+ delete db;
+
+ return (0);
+}
diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc
new file mode 100644
index 00000000000..defa8b73556
--- /dev/null
+++ b/api/leveldb/leveldb_wt.cc
@@ -0,0 +1,673 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+#include "leveldb_wt.h"
+
+using leveldb::FilterPolicy;
+using leveldb::Iterator;
+using leveldb::Options;
+using leveldb::ReadOptions;
+using leveldb::WriteBatch;
+using leveldb::WriteOptions;
+using leveldb::Range;
+using leveldb::Slice;
+using leveldb::Snapshot;
+using leveldb::Status;
+#ifndef WITHOUT_HYPERLEVELDB
+namespace leveldb {
+class ReplayIterator;
+}
+#else
+using leveldb::Value;
+#endif
+
+#define WT_URI "table:data"
+#define WT_CONFIG "type=lsm,leaf_page_max=4KB,leaf_item_max=1KB"
+
+/* Destructors required for interfaces. */
+leveldb::DB::~DB() {}
+Snapshot::~Snapshot() {}
+
+/* Iterators, from leveldb/table/iterator.cc */
+Iterator::Iterator() {
+ cleanup_.function = NULL;
+ cleanup_.next = NULL;
+}
+
+Iterator::~Iterator() {
+ if (cleanup_.function != NULL) {
+ (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2);
+ for (Cleanup* c = cleanup_.next; c != NULL; ) {
+ (*c->function)(c->arg1, c->arg2);
+ Cleanup* next = c->next;
+ delete c;
+ c = next;
+ }
+ }
+}
+
+void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
+ assert(func != NULL);
+ Cleanup* c;
+ if (cleanup_.function == NULL) {
+ c = &cleanup_;
+ } else {
+ c = new Cleanup;
+ c->next = cleanup_.next;
+ cleanup_.next = c;
+ }
+ c->function = func;
+ c->arg1 = arg1;
+ c->arg2 = arg2;
+}
+
+namespace {
+class EmptyIterator : public Iterator {
+ public:
+ EmptyIterator(const Status& s) : status_(s) { }
+ virtual bool Valid() const { return false; }
+ virtual void Seek(const Slice& target) { }
+ virtual void SeekToFirst() { }
+ virtual void SeekToLast() { }
+ virtual void Next() { assert(false); }
+ virtual void Prev() { assert(false); }
+ Slice key() const { assert(false); return Slice(); }
+ Slice value() const { assert(false); return Slice(); }
+ virtual Status status() const { return status_; }
+ private:
+ Status status_;
+};
+} // namespace
+
+Iterator* NewEmptyIterator() {
+ return new EmptyIterator(Status::OK());
+}
+
+Iterator* NewErrorIterator(const Status& status) {
+ return new EmptyIterator(status);
+}
+
+namespace leveldb {
+const FilterPolicy *NewBloomFilterPolicy(int bits_per_key) {
+ return 0;
+}
+
+Cache *NewLRUCache(size_t capacity) {
+ return 0;
+}
+
+Status DestroyDB(const std::string& name, const Options& options) {
+ fprintf(stderr, "DestroyDB %s", name.c_str());
+ return Status::NotSupported("sorry!");
+}
+
+Status RepairDB(const std::string& dbname, const Options& options) {
+ return Status::NotSupported("sorry!");
+}
+}
+
+/* POSIX thread-local storage */
+template <class T>
+class ThreadLocal {
+public:
+ static void cleanup(void *val) {
+ delete (T *)val;
+ }
+
+ ThreadLocal() {
+ int ret = pthread_key_create(&key_, cleanup);
+ assert(ret == 0);
+ }
+
+ ~ThreadLocal() {
+ int ret = pthread_key_delete(key_);
+ assert(ret == 0);
+ }
+
+ T *get() {
+ return (T *)(pthread_getspecific(key_));
+ }
+
+ void set(T *value) {
+ int ret = pthread_setspecific(key_, value);
+ assert(ret == 0);
+ }
+
+private:
+ pthread_key_t key_;
+};
+
+/* WiredTiger implementations. */
+class DbImpl;
+
+/* Context for operations (including snapshots, write batches, transactions) */
+class OperationContext {
+public:
+ OperationContext(WT_CONNECTION *conn) {
+ int ret = conn->open_session(conn, NULL, NULL, &session_);
+ assert(ret == 0);
+ ret = session_->open_cursor(
+ session_, WT_URI, NULL, NULL, &cursor_);
+ assert(ret == 0);
+ }
+
+ ~OperationContext() {
+ int ret = session_->close(session_, NULL);
+ assert(ret == 0);
+ }
+
+ WT_CURSOR *getCursor() { return cursor_; }
+
+private:
+ WT_SESSION *session_;
+ WT_CURSOR *cursor_;
+};
+
+class IteratorImpl : public Iterator {
+public:
+ IteratorImpl(WT_CURSOR *cursor, DbImpl *db, const ReadOptions &options) : cursor_(cursor), valid_(false) {}
+ virtual ~IteratorImpl() {}
+
+ // An iterator is either positioned at a key/value pair, or
+ // not valid. This method returns true iff the iterator is valid.
+ virtual bool Valid() const { return valid_; }
+
+ virtual void SeekToFirst();
+
+ virtual void SeekToLast();
+
+ virtual void Seek(const Slice& target);
+
+ virtual void Next();
+
+ virtual void Prev();
+
+ virtual Slice key() const {
+ return key_;
+ }
+
+ virtual Slice value() const {
+ return value_;
+ }
+
+ virtual Status status() const {
+ return status_;
+ }
+
+private:
+ WT_CURSOR *cursor_;
+ Slice key_, value_;
+ Status status_;
+ bool valid_;
+
+ // No copying allowed
+ IteratorImpl(const IteratorImpl&);
+ void operator=(const IteratorImpl&);
+};
+
+class SnapshotImpl : public Snapshot {
+public:
+ SnapshotImpl(DbImpl *db) : Snapshot() {}
+ virtual ~SnapshotImpl() {}
+};
+
+class DbImpl : public leveldb::DB {
+public:
+ DbImpl(WT_CONNECTION *conn) : DB(), conn_(conn), context_(new ThreadLocal<OperationContext>) {}
+ virtual ~DbImpl() {
+ delete context_;
+ int ret = conn_->close(conn_, NULL);
+ assert(ret == 0);
+ }
+
+ virtual Status Put(const WriteOptions& options,
+ const Slice& key,
+ const Slice& value);
+
+ virtual Status Delete(const WriteOptions& options, const Slice& key);
+
+ virtual Status Write(const WriteOptions& options, WriteBatch* updates);
+
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, std::string* value);
+
+#ifndef WITHOUT_HYPERLEVELDB
+ virtual Status LiveBackup(const Slice& name) { return Status::NotSupported("sorry!"); }
+ virtual void GetReplayTimestamp(std::string* timestamp) {}
+ virtual void AllowGarbageCollectBeforeTimestamp(const std::string& timestamp) {}
+ virtual bool ValidateTimestamp(const std::string& timestamp) {}
+ virtual int CompareTimestamps(const std::string& lhs, const std::string& rhs) {}
+ virtual Status GetReplayIterator(const std::string& timestamp,
+ leveldb::ReplayIterator** iter) { return Status::NotSupported("sorry!"); }
+ virtual void ReleaseReplayIterator(leveldb::ReplayIterator* iter) {}
+#else
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, Value* value);
+#endif
+
+ virtual Iterator* NewIterator(const ReadOptions& options);
+
+ virtual const Snapshot* GetSnapshot();
+
+ virtual void ReleaseSnapshot(const Snapshot* snapshot);
+
+ virtual bool GetProperty(const Slice& property, std::string* value);
+
+ virtual void GetApproximateSizes(const Range* range, int n,
+ uint64_t* sizes);
+
+ virtual void CompactRange(const Slice* begin, const Slice* end);
+
+ virtual void SuspendCompactions();
+
+ virtual void ResumeCompactions();
+
+private:
+ WT_CONNECTION *conn_;
+ ThreadLocal<OperationContext> *context_;
+
+ OperationContext *getContext() {
+ OperationContext *ctx = context_->get();
+ if (ctx == NULL) {
+ ctx = new OperationContext(conn_);
+ context_->set(ctx);
+ }
+ return (ctx);
+ }
+
+ WT_CURSOR *getCursor() { return getContext()->getCursor(); }
+
+ // No copying allowed
+ DbImpl(const DbImpl&);
+ void operator=(const DbImpl&);
+};
+
+Status
+leveldb::DB::Open(const Options &options, const std::string &name, leveldb::DB **dbptr)
+{
+ WT_CONNECTION *conn;
+ int ret = ::wiredtiger_open(name.c_str(), NULL, "create", &conn);
+ assert(ret == 0);
+
+ WT_SESSION *session;
+ ret = conn->open_session(conn, NULL, NULL, &session);
+ assert(ret == 0);
+ ret = session->create(session, WT_URI, WT_CONFIG);
+ assert(ret == 0);
+ ret = session->close(session, NULL);
+ assert(ret == 0);
+
+ *dbptr = new DbImpl(conn);
+ return Status::OK();
+}
+
+// Set the database entry for "key" to "value". Returns OK on success,
+// and a non-OK status on error.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Put(const WriteOptions& options,
+ const Slice& key, const Slice& value)
+{
+ WT_CURSOR *cursor = getCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor->set_value(cursor, &item);
+ int ret = cursor->insert(cursor);
+ assert(ret == 0);
+ return Status::OK();
+}
+
+// Remove the database entry (if any) for "key". Returns OK on
+// success, and a non-OK status on error. It is not an error if "key"
+// did not exist in the database.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Delete(const WriteOptions& options, const Slice& key)
+{
+ WT_CURSOR *cursor = getCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->remove(cursor);
+ assert(ret == 0);
+ return Status::OK();
+}
+
+// Implement WriteBatch::Handler
+class WriteBatchHandler : public WriteBatch::Handler {
+public:
+ WriteBatchHandler(WT_CURSOR *cursor) : cursor_(cursor), status_(0) {}
+ virtual ~WriteBatchHandler() {}
+ int getStatus() { return status_; }
+
+ virtual void Put(const Slice& key, const Slice& value) {
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor_->set_key(cursor_, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor_->set_value(cursor_, &item);
+ int ret = cursor_->insert(cursor_);
+ if (ret != 0 && status_ == 0)
+ status_ = ret;
+ }
+ virtual void Delete(const Slice& key) {
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor_->set_key(cursor_, &item);
+ int ret = cursor_->remove(cursor_);
+ if (ret != 0 && status_ == 0)
+ status_ = ret;
+ }
+
+private:
+ WT_CURSOR *cursor_;
+ int status_;
+};
+
+// Apply the specified updates to the database.
+// Returns OK on success, non-OK on failure.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Write(const WriteOptions& options, WriteBatch* updates)
+{
+ WT_CURSOR *cursor = getCursor();
+ WriteBatchHandler handler(cursor);
+ Status status = updates->Iterate(&handler);
+ assert(handler.getStatus() == 0);
+ return status;
+}
+
+// If the database contains an entry for "key" store the
+// corresponding value in *value and return OK.
+//
+// If there is no entry for "key" leave *value unchanged and return
+// a status for which Status::IsNotFound() returns true.
+//
+// May return some other Status on an error.
+Status
+DbImpl::Get(const ReadOptions& options,
+ const Slice& key, std::string* value)
+{
+ WT_CURSOR *cursor = getCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->search(cursor);
+ if (ret == WT_NOTFOUND)
+ return Status::NotFound("DB::Get key not found");
+ assert(ret == 0);
+ ret = cursor->get_value(cursor, &item);
+ assert(ret == 0);
+ *value = std::string((const char *)item.data, item.size);
+ return Status::OK();
+}
+
+#ifdef WITHOUT_HYPERLEVELDB
+Status
+DbImpl::Get(const ReadOptions& options,
+ const Slice& key, Value* value)
+{
+ WT_CURSOR *cursor = getCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->search(cursor);
+ if (ret == WT_NOTFOUND)
+ return Status::NotFound("DB::Get key not found");
+ assert(ret == 0);
+ ret = cursor->get_value(cursor, &item);
+ assert(ret == 0);
+ value->assign((const char *)item.data, item.size);
+ return Status::OK();
+}
+#endif
+
+// Return a heap-allocated iterator over the contents of the database.
+// The result of NewIterator() is initially invalid (caller must
+// call one of the Seek methods on the iterator before using it).
+//
+// Caller should delete the iterator when it is no longer needed.
+// The returned iterator should be deleted before this db is deleted.
+Iterator *
+DbImpl::NewIterator(const ReadOptions& options)
+{
+ return new IteratorImpl(getCursor(), this, options);
+}
+
+// Return a handle to the current DB state. Iterators created with
+// this handle will all observe a stable snapshot of the current DB
+// state. The caller must call ReleaseSnapshot(result) when the
+// snapshot is no longer needed.
+const Snapshot *
+DbImpl::GetSnapshot()
+{
+ return new SnapshotImpl(this);
+}
+
+// Release a previously acquired snapshot. The caller must not
+// use "snapshot" after this call.
+void
+DbImpl::ReleaseSnapshot(const Snapshot* snapshot)
+{
+ delete (SnapshotImpl *)snapshot;
+}
+
+// DB implementations can export properties about their state
+// via this method. If "property" is a valid property understood by this
+// DB implementation, fills "*value" with its current value and returns
+// true. Otherwise returns false.
+//
+//
+// Valid property names include:
+//
+// "leveldb.num-files-at-level<N>" - return the number of files at level <N>,
+// where <N> is an ASCII representation of a level number (e.g. "0").
+// "leveldb.stats" - returns a multi-line string that describes statistics
+// about the internal operation of the DB.
+// "leveldb.sstables" - returns a multi-line string that describes all
+// of the sstables that make up the db contents.
+bool
+DbImpl::GetProperty(const Slice& property, std::string* value)
+{
+ /* Not supported */
+ return false;
+}
+
+// For each i in [0,n-1], store in "sizes[i]", the approximate
+// file system space used by keys in "[range[i].start .. range[i].limit)".
+//
+// Note that the returned sizes measure file system space usage, so
+// if the user data compresses by a factor of ten, the returned
+// sizes will be one-tenth the size of the corresponding user data size.
+//
+// The results may not include the sizes of recently written data.
+void
+DbImpl::GetApproximateSizes(const Range* range, int n,
+ uint64_t* sizes)
+{
+ int i;
+
+ /* XXX Not supported */
+ for (i = 0; i < n; i++)
+ sizes[i] = 1;
+}
+
+// Compact the underlying storage for the key range [*begin,*end].
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// begin==NULL is treated as a key before all keys in the database.
+// end==NULL is treated as a key after all keys in the database.
+// Therefore the following call will compact the entire database:
+// db->CompactRange(NULL, NULL);
+void
+DbImpl::CompactRange(const Slice* begin, const Slice* end)
+{
+ /* Not supported */
+ assert(0);
+}
+
+// Suspends the background compaction thread. This methods
+// returns once suspended.
+void
+DbImpl::SuspendCompactions()
+{
+ /* Not supported */
+ assert(0);
+}
+
+// Resumes a suspended background compation thread.
+void
+DbImpl::ResumeCompactions()
+{
+ /* Not supported */
+ assert(0);
+}
+
+// Position at the first key in the source. The iterator is Valid()
+// after this call iff the source is not empty.
+void
+IteratorImpl::SeekToFirst()
+{
+ WT_ITEM item;
+
+ int ret = cursor_->reset(cursor_);
+ assert(ret == 0);
+ ret = cursor_->next(cursor_);
+ if (ret != 0) {
+ valid_ = false;
+ return;
+ }
+ ret = cursor_->get_key(cursor_, &item);
+ assert(ret == 0);
+ key_ = Slice((const char *)item.data, item.size);
+ ret = cursor_->get_value(cursor_, &item);
+ assert(ret == 0);
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Position at the last key in the source. The iterator is
+// Valid() after this call iff the source is not empty.
+void
+IteratorImpl::SeekToLast()
+{
+ WT_ITEM item;
+
+ int ret = cursor_->reset(cursor_);
+ assert(ret == 0);
+ ret = cursor_->prev(cursor_);
+ if (ret != 0) {
+ valid_ = false;
+ return;
+ }
+ ret = cursor_->get_key(cursor_, &item);
+ assert(ret == 0);
+ key_ = Slice((const char *)item.data, item.size);
+ ret = cursor_->get_value(cursor_, &item);
+ assert(ret == 0);
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Position at the first key in the source that at or past target
+// The iterator is Valid() after this call iff the source contains
+// an entry that comes at or past target.
+void
+IteratorImpl::Seek(const Slice& target)
+{
+ WT_ITEM item;
+
+ item.data = target.data();
+ item.size = target.size();
+ cursor_->set_key(cursor_, &item);
+ int cmp, ret = cursor_->search_near(cursor_, &cmp);
+ if (ret == 0 && cmp < 0)
+ ret = cursor_->next(cursor_);
+ if (ret == WT_NOTFOUND)
+ status_ = Status::NotFound("Iterator::Seek key not found");
+ if (ret != 0) {
+ valid_ = false;
+ return;
+ }
+ ret = cursor_->get_key(cursor_, &item);
+ assert(ret == 0);
+ key_ = Slice((const char *)item.data, item.size);
+ ret = cursor_->get_value(cursor_, &item);
+ assert(ret == 0);
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Moves to the next entry in the source. After this call, Valid() is
+// true iff the iterator was not positioned at the last entry in the source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Next()
+{
+ WT_ITEM item;
+
+ assert(valid_);
+
+ int ret = cursor_->next(cursor_);
+ if (ret == WT_NOTFOUND)
+ status_ = Status::NotFound("Iterator::Next no more records");
+ if (ret != 0) {
+ valid_ = false;
+ return;
+ }
+ ret = cursor_->get_key(cursor_, &item);
+ assert(ret == 0);
+ key_ = Slice((const char *)item.data, item.size);
+ ret = cursor_->get_value(cursor_, &item);
+ assert(ret == 0);
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Moves to the previous entry in the source. After this call, Valid() is
+// true iff the iterator was not positioned at the first entry in source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Prev()
+{
+ WT_ITEM item;
+
+ assert(valid_);
+
+ int ret = cursor_->prev(cursor_);
+ if (ret == WT_NOTFOUND)
+ status_ = Status::NotFound("Iterator::Prev no more records");
+ if (ret != 0) {
+ valid_ = false;
+ return;
+ }
+ ret = cursor_->get_key(cursor_, &item);
+ assert(ret == 0);
+ key_ = Slice((const char *)item.data, item.size);
+ ret = cursor_->get_value(cursor_, &item);
+ assert(ret == 0);
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h
new file mode 100644
index 00000000000..0c46456a9ef
--- /dev/null
+++ b/api/leveldb/leveldb_wt.h
@@ -0,0 +1,23 @@
+#ifndef WITHOUT_HYPERLEVELDB
+#include <hyperleveldb/comparator.h>
+#include <hyperleveldb/db.h>
+#include <hyperleveldb/env.h>
+#include <hyperleveldb/filter_policy.h>
+#include <hyperleveldb/slice.h>
+#include <hyperleveldb/slice.h>
+#include <hyperleveldb/status.h>
+#include <hyperleveldb/table_builder.h>
+#include <hyperleveldb/write_batch.h>
+#else
+#include <leveldb/comparator.h>
+#include <leveldb/db.h>
+#include <leveldb/env.h>
+#include <leveldb/filter_policy.h>
+#include <leveldb/slice.h>
+#include <leveldb/slice.h>
+#include <leveldb/status.h>
+#include <leveldb/table_builder.h>
+#include <leveldb/write_batch.h>
+#endif
+
+#include "wiredtiger.h"
diff --git a/api/leveldb/port/port.h b/api/leveldb/port/port.h
new file mode 100644
index 00000000000..b6f551182da
--- /dev/null
+++ b/api/leveldb/port/port.h
@@ -0,0 +1,11 @@
+#ifndef _PORT_H_
+#define _PORT_H_ 1
+/* Stub portability header for imported LevelDB code. */
+
+#include "wiredtiger.h"
+
+namespace port {
+ const int kLittleEndian = 1;
+}
+
+#endif
diff --git a/api/leveldb/util/arena.h b/api/leveldb/util/arena.h
new file mode 100644
index 00000000000..8f7dde226c4
--- /dev/null
+++ b/api/leveldb/util/arena.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
+#define STORAGE_LEVELDB_UTIL_ARENA_H_
+
+#include <cstddef>
+#include <vector>
+#include <assert.h>
+#include <stdint.h>
+
+namespace leveldb {
+
+class Arena {
+ public:
+ Arena();
+ ~Arena();
+
+ // Return a pointer to a newly allocated memory block of "bytes" bytes.
+ char* Allocate(size_t bytes);
+
+ // Allocate memory with the normal alignment guarantees provided by malloc
+ char* AllocateAligned(size_t bytes);
+
+ // Returns an estimate of the total memory usage of data allocated
+ // by the arena (including space allocated but not yet used for user
+ // allocations).
+ size_t MemoryUsage() const {
+ return blocks_memory_ + blocks_.capacity() * sizeof(char*);
+ }
+
+ private:
+ char* AllocateFallback(size_t bytes);
+ char* AllocateNewBlock(size_t block_bytes);
+
+ // Allocation state
+ char* alloc_ptr_;
+ size_t alloc_bytes_remaining_;
+
+ // Array of new[] allocated memory blocks
+ std::vector<char*> blocks_;
+
+ // Bytes of memory in blocks allocated so far
+ size_t blocks_memory_;
+
+ // No copying allowed
+ Arena(const Arena&);
+ void operator=(const Arena&);
+};
+
+inline char* Arena::Allocate(size_t bytes) {
+ // The semantics of what to return are a bit messy if we allow
+ // 0-byte allocations, so we disallow them here (we don't need
+ // them for our internal use).
+ assert(bytes > 0);
+ if (bytes <= alloc_bytes_remaining_) {
+ char* result = alloc_ptr_;
+ alloc_ptr_ += bytes;
+ alloc_bytes_remaining_ -= bytes;
+ return result;
+ }
+ return AllocateFallback(bytes);
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_ARENA_H_
diff --git a/api/leveldb/util/coding.cc b/api/leveldb/util/coding.cc
new file mode 100644
index 00000000000..dbd7a6545c6
--- /dev/null
+++ b/api/leveldb/util/coding.cc
@@ -0,0 +1,194 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+namespace leveldb {
+
+void EncodeFixed32(char* buf, uint32_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(buf, &value, sizeof(value));
+#else
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+#endif
+}
+
+void EncodeFixed64(char* buf, uint64_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(buf, &value, sizeof(value));
+#else
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+ buf[4] = (value >> 32) & 0xff;
+ buf[5] = (value >> 40) & 0xff;
+ buf[6] = (value >> 48) & 0xff;
+ buf[7] = (value >> 56) & 0xff;
+#endif
+}
+
+void PutFixed32(std::string* dst, uint32_t value) {
+ char buf[sizeof(value)];
+ EncodeFixed32(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+void PutFixed64(std::string* dst, uint64_t value) {
+ char buf[sizeof(value)];
+ EncodeFixed64(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+char* EncodeVarint32(char* dst, uint32_t v) {
+ // Operate on characters as unsigneds
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ static const int B = 128;
+ if (v < (1<<7)) {
+ *(ptr++) = v;
+ } else if (v < (1<<14)) {
+ *(ptr++) = v | B;
+ *(ptr++) = v>>7;
+ } else if (v < (1<<21)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = v>>14;
+ } else if (v < (1<<28)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = v>>21;
+ } else {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = (v>>21) | B;
+ *(ptr++) = v>>28;
+ }
+ return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint32(std::string* dst, uint32_t v) {
+ char buf[5];
+ char* ptr = EncodeVarint32(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+char* EncodeVarint64(char* dst, uint64_t v) {
+ static const int B = 128;
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ while (v >= B) {
+ *(ptr++) = (v & (B-1)) | B;
+ v >>= 7;
+ }
+ *(ptr++) = static_cast<unsigned char>(v);
+ return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint64(std::string* dst, uint64_t v) {
+ char buf[10];
+ char* ptr = EncodeVarint64(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
+ PutVarint32(dst, value.size());
+ dst->append(value.data(), value.size());
+}
+
+int VarintLength(uint64_t v) {
+ int len = 1;
+ while (v >= 128) {
+ v >>= 7;
+ len++;
+ }
+ return len;
+}
+
+const char* GetVarint32PtrFallback(const char* p,
+ const char* limit,
+ uint32_t* value) {
+ uint32_t result = 0;
+ for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
+ uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+bool GetVarint32(Slice* input, uint32_t* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint32Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = Slice(q, limit - q);
+ return true;
+ }
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
+ uint64_t result = 0;
+ for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
+ uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+bool GetVarint64(Slice* input, uint64_t* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint64Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = Slice(q, limit - q);
+ return true;
+ }
+}
+
+const char* GetLengthPrefixedSlice(const char* p, const char* limit,
+ Slice* result) {
+ uint32_t len;
+ p = GetVarint32Ptr(p, limit, &len);
+ if (p == NULL) return NULL;
+ if (p + len > limit) return NULL;
+ *result = Slice(p, len);
+ return p + len;
+}
+
+bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
+ uint32_t len;
+ if (GetVarint32(input, &len) &&
+ input->size() >= len) {
+ *result = Slice(input->data(), len);
+ input->remove_prefix(len);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/util/coding.h b/api/leveldb/util/coding.h
new file mode 100644
index 00000000000..cba97ec421a
--- /dev/null
+++ b/api/leveldb/util/coding.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Endian-neutral encoding:
+// * Fixed-length numbers are encoded with least-significant byte first
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+
+#ifndef STORAGE_LEVELDB_UTIL_CODING_H_
+#define STORAGE_LEVELDB_UTIL_CODING_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <string>
+#include "leveldb_wt.h"
+#include "port/port.h"
+
+namespace leveldb {
+
+// Standard Put... routines append to a string
+extern void PutFixed32(std::string* dst, uint32_t value);
+extern void PutFixed64(std::string* dst, uint64_t value);
+extern void PutVarint32(std::string* dst, uint32_t value);
+extern void PutVarint64(std::string* dst, uint64_t value);
+extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
+
+// Standard Get... routines parse a value from the beginning of a Slice
+// and advance the slice past the parsed value.
+extern bool GetVarint32(Slice* input, uint32_t* value);
+extern bool GetVarint64(Slice* input, uint64_t* value);
+extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
+
+// Pointer-based variants of GetVarint... These either store a value
+// in *v and return a pointer just past the parsed value, or return
+// NULL on error. These routines only look at bytes in the range
+// [p..limit-1]
+extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v);
+extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v);
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+extern void EncodeFixed32(char* dst, uint32_t value);
+extern void EncodeFixed64(char* dst, uint64_t value);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// and return a pointer just past the last byte written.
+// REQUIRES: dst has enough space for the value being written
+extern char* EncodeVarint32(char* dst, uint32_t value);
+extern char* EncodeVarint64(char* dst, uint64_t value);
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint32_t result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+ }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint64_t result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ uint64_t lo = DecodeFixed32(ptr);
+ uint64_t hi = DecodeFixed32(ptr + 4);
+ return (hi << 32) | lo;
+ }
+}
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p,
+ const char* limit,
+ uint32_t* value);
+inline const char* GetVarint32Ptr(const char* p,
+ const char* limit,
+ uint32_t* value) {
+ if (p < limit) {
+ uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
+ if ((result & 128) == 0) {
+ *value = result;
+ return p + 1;
+ }
+ }
+ return GetVarint32PtrFallback(p, limit, value);
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_CODING_H_
diff --git a/api/leveldb/util/comparator.cc b/api/leveldb/util/comparator.cc
new file mode 100644
index 00000000000..c6c9e17efee
--- /dev/null
+++ b/api/leveldb/util/comparator.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <algorithm>
+#include <stdint.h>
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+Comparator::~Comparator() { }
+
+#ifndef WITHOUT_HYPERLEVELDB
+uint64_t Comparator::KeyNum(const Slice& key) const {
+ return 0;
+}
+#endif
+
+namespace {
+class BytewiseComparatorImpl : public Comparator {
+ public:
+ BytewiseComparatorImpl() { }
+
+ virtual const char* Name() const {
+ return "leveldb.BytewiseComparator";
+ }
+
+ virtual int Compare(const Slice& a, const Slice& b) const {
+ return a.compare(b);
+ }
+
+ virtual void FindShortestSeparator(
+ std::string* start,
+ const Slice& limit) const {
+ // Find length of common prefix
+ size_t min_length = std::min(start->size(), limit.size());
+ size_t diff_index = 0;
+ while ((diff_index < min_length) &&
+ ((*start)[diff_index] == limit[diff_index])) {
+ diff_index++;
+ }
+
+ if (diff_index >= min_length) {
+ // Do not shorten if one string is a prefix of the other
+ } else {
+ uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
+ if (diff_byte < static_cast<uint8_t>(0xff) &&
+ diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
+ (*start)[diff_index]++;
+ start->resize(diff_index + 1);
+ assert(Compare(*start, limit) < 0);
+ }
+ }
+ }
+
+ virtual void FindShortSuccessor(std::string* key) const {
+ // Find first character that can be incremented
+ size_t n = key->size();
+ for (size_t i = 0; i < n; i++) {
+ const uint8_t byte = (*key)[i];
+ if (byte != static_cast<uint8_t>(0xff)) {
+ (*key)[i] = byte + 1;
+ key->resize(i+1);
+ return;
+ }
+ }
+ // *key is a run of 0xffs. Leave it alone.
+ }
+};
+} // namespace
+
+static const Comparator* bytewise = new BytewiseComparatorImpl;
+
+const Comparator* BytewiseComparator() {
+ return bytewise;
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/util/env.cc b/api/leveldb/util/env.cc
new file mode 100644
index 00000000000..33062fbdd1b
--- /dev/null
+++ b/api/leveldb/util/env.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "hyperleveldb/env.h"
+
+namespace leveldb {
+
+Env::~Env() {
+}
+
+SequentialFile::~SequentialFile() {
+}
+
+RandomAccessFile::~RandomAccessFile() {
+}
+
+WritableFile::~WritableFile() {
+}
+
+Logger::~Logger() {
+}
+
+FileLock::~FileLock() {
+}
+
+void Log(Logger* info_log, const char* format, ...) {
+ if (info_log != NULL) {
+ va_list ap;
+ va_start(ap, format);
+ info_log->Logv(format, ap);
+ va_end(ap);
+ }
+}
+
+static Status DoWriteStringToFile(Env* env, const Slice& data,
+ const std::string& fname,
+ bool should_sync) {
+ WritableFile* file;
+ Status s = env->NewWritableFile(fname, &file);
+ if (!s.ok()) {
+ return s;
+ }
+ s = file->Append(data);
+ if (s.ok() && should_sync) {
+ s = file->Sync();
+ }
+ if (s.ok()) {
+ s = file->Close();
+ }
+ delete file; // Will auto-close if we did not close above
+ if (!s.ok()) {
+ env->DeleteFile(fname);
+ }
+ return s;
+}
+
+Status WriteStringToFile(Env* env, const Slice& data,
+ const std::string& fname) {
+ return DoWriteStringToFile(env, data, fname, false);
+}
+
+Status WriteStringToFileSync(Env* env, const Slice& data,
+ const std::string& fname) {
+ return DoWriteStringToFile(env, data, fname, true);
+}
+
+Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
+ data->clear();
+ SequentialFile* file;
+ Status s = env->NewSequentialFile(fname, &file);
+ if (!s.ok()) {
+ return s;
+ }
+ static const int kBufferSize = 8192;
+ char* space = new char[kBufferSize];
+ while (true) {
+ Slice fragment;
+ s = file->Read(kBufferSize, &fragment, space);
+ if (!s.ok()) {
+ break;
+ }
+ data->append(fragment.data(), fragment.size());
+ if (fragment.empty()) {
+ break;
+ }
+ }
+ delete[] space;
+ delete file;
+ return s;
+}
+
+EnvWrapper::~EnvWrapper() {
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/util/env_posix.cc b/api/leveldb/util/env_posix.cc
new file mode 100644
index 00000000000..66dab413360
--- /dev/null
+++ b/api/leveldb/util/env_posix.cc
@@ -0,0 +1,617 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <deque>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(LEVELDB_PLATFORM_ANDROID)
+#include <sys/stat.h>
+#endif
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+#include "util/posix_logger.h"
+
+namespace leveldb {
+
+namespace {
+
+static Status IOError(const std::string& context, int err_number) {
+ return Status::IOError(context, strerror(err_number));
+}
+
+class PosixSequentialFile: public SequentialFile {
+ private:
+ std::string filename_;
+ FILE* file_;
+
+ public:
+ PosixSequentialFile(const std::string& fname, FILE* f)
+ : filename_(fname), file_(f) { }
+ virtual ~PosixSequentialFile() { fclose(file_); }
+
+ virtual Status Read(size_t n, Slice* result, char* scratch) {
+ Status s;
+ size_t r = fread_unlocked(scratch, 1, n, file_);
+ *result = Slice(scratch, r);
+ if (r < n) {
+ if (feof(file_)) {
+ // We leave status as ok if we hit the end of the file
+ } else {
+ // A partial read with an error: return a non-ok status
+ s = IOError(filename_, errno);
+ }
+ }
+ return s;
+ }
+
+ virtual Status Skip(uint64_t n) {
+ if (fseek(file_, n, SEEK_CUR)) {
+ return IOError(filename_, errno);
+ }
+ return Status::OK();
+ }
+};
+
+// pread() based random-access
+class PosixRandomAccessFile: public RandomAccessFile {
+ private:
+ std::string filename_;
+ int fd_;
+
+ public:
+ PosixRandomAccessFile(const std::string& fname, int fd)
+ : filename_(fname), fd_(fd) { }
+ virtual ~PosixRandomAccessFile() { close(fd_); }
+
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const {
+ Status s;
+ ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
+ *result = Slice(scratch, (r < 0) ? 0 : r);
+ if (r < 0) {
+ // An error: return a non-ok status
+ s = IOError(filename_, errno);
+ }
+ return s;
+ }
+};
+
+// mmap() based random-access
+class PosixMmapReadableFile: public RandomAccessFile {
+ private:
+ std::string filename_;
+ void* mmapped_region_;
+ size_t length_;
+
+ public:
+ // base[0,length-1] contains the mmapped contents of the file.
+ PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
+ : filename_(fname), mmapped_region_(base), length_(length) { }
+ virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
+
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const {
+ Status s;
+ if (offset + n > length_) {
+ *result = Slice();
+ s = IOError(filename_, EINVAL);
+ } else {
+ *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+ }
+ return s;
+ }
+};
+
+// We preallocate up to an extra megabyte and use memcpy to append new
+// data to the file. This is safe since we either properly close the
+// file before reading from it, or for log files, the reading code
+// knows enough to skip zero suffixes.
+class PosixMmapFile : public WritableFile {
+ private:
+ std::string filename_;
+ int fd_;
+ size_t page_size_;
+ size_t map_size_; // How much extra memory to map at a time
+ char* base_; // The mapped region
+ char* limit_; // Limit of the mapped region
+ char* dst_; // Where to write next (in range [base_,limit_])
+ char* last_sync_; // Where have we synced up to
+ uint64_t file_offset_; // Offset of base_ in file
+
+ // Have we done an munmap of unsynced data?
+ bool pending_sync_;
+
+ // Roundup x to a multiple of y
+ static size_t Roundup(size_t x, size_t y) {
+ return ((x + y - 1) / y) * y;
+ }
+
+ size_t TruncateToPageBoundary(size_t s) {
+ s -= (s & (page_size_ - 1));
+ assert((s % page_size_) == 0);
+ return s;
+ }
+
+ bool UnmapCurrentRegion() {
+ bool result = true;
+ if (base_ != NULL) {
+ if (last_sync_ < limit_) {
+ // Defer syncing this data until next Sync() call, if any
+ pending_sync_ = true;
+ }
+ if (munmap(base_, limit_ - base_) != 0) {
+ result = false;
+ }
+ file_offset_ += limit_ - base_;
+ base_ = NULL;
+ limit_ = NULL;
+ last_sync_ = NULL;
+ dst_ = NULL;
+
+ // Increase the amount we map the next time, but capped at 1MB
+ if (map_size_ < (1<<20)) {
+ map_size_ *= 2;
+ }
+ }
+ return result;
+ }
+
+ bool MapNewRegion() {
+ assert(base_ == NULL);
+ if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
+ return false;
+ }
+ void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_, file_offset_);
+ if (ptr == MAP_FAILED) {
+ return false;
+ }
+ base_ = reinterpret_cast<char*>(ptr);
+ limit_ = base_ + map_size_;
+ dst_ = base_;
+ last_sync_ = base_;
+ return true;
+ }
+
+ public:
+ PosixMmapFile(const std::string& fname, int fd, size_t page_size)
+ : filename_(fname),
+ fd_(fd),
+ page_size_(page_size),
+ map_size_(Roundup(65536, page_size)),
+ base_(NULL),
+ limit_(NULL),
+ dst_(NULL),
+ last_sync_(NULL),
+ file_offset_(0),
+ pending_sync_(false) {
+ assert((page_size & (page_size - 1)) == 0);
+ }
+
+
+ ~PosixMmapFile() {
+ if (fd_ >= 0) {
+ PosixMmapFile::Close();
+ }
+ }
+
+ virtual Status Append(const Slice& data) {
+ const char* src = data.data();
+ size_t left = data.size();
+ while (left > 0) {
+ assert(base_ <= dst_);
+ assert(dst_ <= limit_);
+ size_t avail = limit_ - dst_;
+ if (avail == 0) {
+ if (!UnmapCurrentRegion() ||
+ !MapNewRegion()) {
+ return IOError(filename_, errno);
+ }
+ }
+
+ size_t n = (left <= avail) ? left : avail;
+ memcpy(dst_, src, n);
+ dst_ += n;
+ src += n;
+ left -= n;
+ }
+ return Status::OK();
+ }
+
+ virtual Status Close() {
+ Status s;
+ size_t unused = limit_ - dst_;
+ if (!UnmapCurrentRegion()) {
+ s = IOError(filename_, errno);
+ } else if (unused > 0) {
+ // Trim the extra space at the end of the file
+ if (ftruncate(fd_, file_offset_ - unused) < 0) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ if (close(fd_) < 0) {
+ if (s.ok()) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ fd_ = -1;
+ base_ = NULL;
+ limit_ = NULL;
+ return s;
+ }
+
+ virtual Status Flush() {
+ return Status::OK();
+ }
+
+ virtual Status Sync() {
+ Status s;
+
+ if (pending_sync_) {
+ // Some unmapped data was not synced
+ pending_sync_ = false;
+ if (fdatasync(fd_) < 0) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ if (dst_ > last_sync_) {
+ // Find the beginnings of the pages that contain the first and last
+ // bytes to be synced.
+ size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
+ size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
+ last_sync_ = dst_;
+ if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ return s;
+ }
+
+#ifndef WITHOUT_HYPERLEVELDB
+ virtual Status WriteAt(uint64_t offset, const Slice& data) { return Status::NotSupported("sorry!"); }
+#endif
+};
+
+static int LockOrUnlock(int fd, bool lock) {
+ errno = 0;
+ struct flock f;
+ memset(&f, 0, sizeof(f));
+ f.l_type = (lock ? F_WRLCK : F_UNLCK);
+ f.l_whence = SEEK_SET;
+ f.l_start = 0;
+ f.l_len = 0; // Lock/unlock entire file
+ return fcntl(fd, F_SETLK, &f);
+}
+
+class PosixFileLock : public FileLock {
+ public:
+ int fd_;
+};
+
+class PosixEnv : public Env {
+ public:
+ PosixEnv();
+ virtual ~PosixEnv() {
+ fprintf(stderr, "Destroying Env::Default()\n");
+ exit(1);
+ }
+
+ virtual Status NewSequentialFile(const std::string& fname,
+ SequentialFile** result) {
+ FILE* f = fopen(fname.c_str(), "r");
+ if (f == NULL) {
+ *result = NULL;
+ return IOError(fname, errno);
+ } else {
+ *result = new PosixSequentialFile(fname, f);
+ return Status::OK();
+ }
+ }
+
+ virtual Status NewRandomAccessFile(const std::string& fname,
+ RandomAccessFile** result) {
+ *result = NULL;
+ Status s;
+ int fd = open(fname.c_str(), O_RDONLY);
+ if (fd < 0) {
+ s = IOError(fname, errno);
+ } else if (sizeof(void*) >= 8) {
+ // Use mmap when virtual address-space is plentiful.
+ uint64_t size;
+ s = GetFileSize(fname, &size);
+ if (s.ok()) {
+ void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+ if (base != MAP_FAILED) {
+ *result = new PosixMmapReadableFile(fname, base, size);
+ } else {
+ s = IOError(fname, errno);
+ }
+ }
+ close(fd);
+ } else {
+ *result = new PosixRandomAccessFile(fname, fd);
+ }
+ return s;
+ }
+
+ virtual Status NewWritableFile(const std::string& fname,
+ WritableFile** result) {
+ Status s;
+ const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
+ if (fd < 0) {
+ *result = NULL;
+ s = IOError(fname, errno);
+ } else {
+ *result = new PosixMmapFile(fname, fd, page_size_);
+ }
+ return s;
+ }
+
+ virtual bool FileExists(const std::string& fname) {
+ return access(fname.c_str(), F_OK) == 0;
+ }
+
+ virtual Status GetChildren(const std::string& dir,
+ std::vector<std::string>* result) {
+ result->clear();
+ DIR* d = opendir(dir.c_str());
+ if (d == NULL) {
+ return IOError(dir, errno);
+ }
+ struct dirent* entry;
+ while ((entry = readdir(d)) != NULL) {
+ result->push_back(entry->d_name);
+ }
+ closedir(d);
+ return Status::OK();
+ }
+
+ virtual Status DeleteFile(const std::string& fname) {
+ Status result;
+ if (unlink(fname.c_str()) != 0) {
+ result = IOError(fname, errno);
+ }
+ return result;
+ };
+
+ virtual Status CreateDir(const std::string& name) {
+ Status result;
+ if (mkdir(name.c_str(), 0755) != 0) {
+ result = IOError(name, errno);
+ }
+ return result;
+ };
+
+ virtual Status DeleteDir(const std::string& name) {
+ Status result;
+ if (rmdir(name.c_str()) != 0) {
+ result = IOError(name, errno);
+ }
+ return result;
+ };
+
+ virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
+ Status s;
+ struct stat sbuf;
+ if (stat(fname.c_str(), &sbuf) != 0) {
+ *size = 0;
+ s = IOError(fname, errno);
+ } else {
+ *size = sbuf.st_size;
+ }
+ return s;
+ }
+
+ virtual Status RenameFile(const std::string& src, const std::string& target) {
+ Status result;
+ if (rename(src.c_str(), target.c_str()) != 0) {
+ result = IOError(src, errno);
+ }
+ return result;
+ }
+
+ virtual Status LockFile(const std::string& fname, FileLock** lock) {
+ *lock = NULL;
+ Status result;
+ int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+ if (fd < 0) {
+ result = IOError(fname, errno);
+ } else if (LockOrUnlock(fd, true) == -1) {
+ result = IOError("lock " + fname, errno);
+ close(fd);
+ } else {
+ PosixFileLock* my_lock = new PosixFileLock;
+ my_lock->fd_ = fd;
+ *lock = my_lock;
+ }
+ return result;
+ }
+
+ virtual Status UnlockFile(FileLock* lock) {
+ PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
+ Status result;
+ if (LockOrUnlock(my_lock->fd_, false) == -1) {
+ result = IOError("unlock", errno);
+ }
+ close(my_lock->fd_);
+ delete my_lock;
+ return result;
+ }
+
+ virtual void Schedule(void (*function)(void*), void* arg);
+
+ virtual void StartThread(void (*function)(void* arg), void* arg);
+
+ virtual Status GetTestDirectory(std::string* result) {
+ const char* env = getenv("TEST_TMPDIR");
+ if (env && env[0] != '\0') {
+ *result = env;
+ } else {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid()));
+ *result = buf;
+ }
+ // Directory may already exist
+ CreateDir(*result);
+ return Status::OK();
+ }
+
+ static uint64_t gettid() {
+ pthread_t tid = pthread_self();
+ uint64_t thread_id = 0;
+ memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
+ return thread_id;
+ }
+
+ virtual Status NewLogger(const std::string& fname, Logger** result) {
+ FILE* f = fopen(fname.c_str(), "w");
+ if (f == NULL) {
+ *result = NULL;
+ return IOError(fname, errno);
+ } else {
+ *result = new PosixLogger(f, &PosixEnv::gettid);
+ return Status::OK();
+ }
+ }
+
+ virtual uint64_t NowMicros() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+ }
+
+ virtual void SleepForMicroseconds(int micros) {
+ usleep(micros);
+ }
+
+#ifndef WITHOUT_HYPERLEVELDB
+ virtual Status CopyFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+ virtual Status LinkFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+#endif
+
+ private:
+ void PthreadCall(const char* label, int result) {
+ if (result != 0) {
+ fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
+ exit(1);
+ }
+ }
+
+ // BGThread() is the body of the background thread
+ void BGThread();
+ static void* BGThreadWrapper(void* arg) {
+ reinterpret_cast<PosixEnv*>(arg)->BGThread();
+ return NULL;
+ }
+
+ size_t page_size_;
+ pthread_mutex_t mu_;
+ pthread_cond_t bgsignal_;
+ pthread_t bgthread_;
+ bool started_bgthread_;
+
+ // Entry per Schedule() call
+ struct BGItem { void* arg; void (*function)(void*); };
+ typedef std::deque<BGItem> BGQueue;
+ BGQueue queue_;
+};
+
+PosixEnv::PosixEnv() : page_size_(getpagesize()),
+ started_bgthread_(false) {
+ PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
+ PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
+}
+
+void PosixEnv::Schedule(void (*function)(void*), void* arg) {
+ PthreadCall("lock", pthread_mutex_lock(&mu_));
+
+ // Start background thread if necessary
+ if (!started_bgthread_) {
+ started_bgthread_ = true;
+ PthreadCall(
+ "create thread",
+ pthread_create(&bgthread_, NULL, &PosixEnv::BGThreadWrapper, this));
+ }
+
+ // If the queue is currently empty, the background thread may currently be
+ // waiting.
+ if (queue_.empty()) {
+ PthreadCall("signal", pthread_cond_signal(&bgsignal_));
+ }
+
+ // Add to priority queue
+ queue_.push_back(BGItem());
+ queue_.back().function = function;
+ queue_.back().arg = arg;
+
+ PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+void PosixEnv::BGThread() {
+ while (true) {
+ // Wait until there is an item that is ready to run
+ PthreadCall("lock", pthread_mutex_lock(&mu_));
+ while (queue_.empty()) {
+ PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_));
+ }
+
+ void (*function)(void*) = queue_.front().function;
+ void* arg = queue_.front().arg;
+ queue_.pop_front();
+
+ PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+ (*function)(arg);
+ }
+}
+
+namespace {
+struct StartThreadState {
+ void (*user_function)(void*);
+ void* arg;
+};
+}
+static void* StartThreadWrapper(void* arg) {
+ StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
+ state->user_function(state->arg);
+ delete state;
+ return NULL;
+}
+
+void PosixEnv::StartThread(void (*function)(void* arg), void* arg) {
+ pthread_t t;
+ StartThreadState* state = new StartThreadState;
+ state->user_function = function;
+ state->arg = arg;
+ PthreadCall("start thread",
+ pthread_create(&t, NULL, &StartThreadWrapper, state));
+}
+
+} // namespace
+
+static pthread_once_t once = PTHREAD_ONCE_INIT;
+static Env* default_env;
+static void InitDefaultEnv() { default_env = new PosixEnv; }
+
+Env* Env::Default() {
+ pthread_once(&once, InitDefaultEnv);
+ return default_env;
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/util/logging.cc b/api/leveldb/util/logging.cc
new file mode 100644
index 00000000000..1d60ce22701
--- /dev/null
+++ b/api/leveldb/util/logging.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/logging.h"
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+void AppendNumberTo(std::string* str, uint64_t num) {
+ char buf[30];
+ snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num);
+ str->append(buf);
+}
+
+void AppendEscapedStringTo(std::string* str, const Slice& value) {
+ for (size_t i = 0; i < value.size(); i++) {
+ char c = value[i];
+ if (c >= ' ' && c <= '~') {
+ str->push_back(c);
+ } else {
+ char buf[10];
+ snprintf(buf, sizeof(buf), "\\x%02x",
+ static_cast<unsigned int>(c) & 0xff);
+ str->append(buf);
+ }
+ }
+}
+
+std::string NumberToString(uint64_t num) {
+ std::string r;
+ AppendNumberTo(&r, num);
+ return r;
+}
+
+std::string EscapeString(const Slice& value) {
+ std::string r;
+ AppendEscapedStringTo(&r, value);
+ return r;
+}
+
+bool ConsumeChar(Slice* in, char c) {
+ if (!in->empty() && (*in)[0] == c) {
+ in->remove_prefix(1);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
+ uint64_t v = 0;
+ int digits = 0;
+ while (!in->empty()) {
+ char c = (*in)[0];
+ if (c >= '0' && c <= '9') {
+ ++digits;
+ const int delta = (c - '0');
+ static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
+ if (v > kMaxUint64/10 ||
+ (v == kMaxUint64/10 && delta > kMaxUint64%10)) {
+ // Overflow
+ return false;
+ }
+ v = (v * 10) + delta;
+ in->remove_prefix(1);
+ } else {
+ break;
+ }
+ }
+ *val = v;
+ return (digits > 0);
+}
+
+} // namespace leveldb
diff --git a/api/leveldb/util/logging.h b/api/leveldb/util/logging.h
new file mode 100644
index 00000000000..b0c5da813e8
--- /dev/null
+++ b/api/leveldb/util/logging.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Must not be included from any .h files to avoid polluting the namespace
+// with macros.
+
+#ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_
+#define STORAGE_LEVELDB_UTIL_LOGGING_H_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string>
+#include "port/port.h"
+
+namespace leveldb {
+
+class Slice;
+class WritableFile;
+
+// Append a human-readable printout of "num" to *str
+extern void AppendNumberTo(std::string* str, uint64_t num);
+
+// Append a human-readable printout of "value" to *str.
+// Escapes any non-printable characters found in "value".
+extern void AppendEscapedStringTo(std::string* str, const Slice& value);
+
+// Return a human-readable printout of "num"
+extern std::string NumberToString(uint64_t num);
+
+// Return a human-readable version of "value".
+// Escapes any non-printable characters found in "value".
+extern std::string EscapeString(const Slice& value);
+
+// If *in starts with "c", advances *in past the first character and
+// returns true. Otherwise, returns false.
+extern bool ConsumeChar(Slice* in, char c);
+
+// Parse a human-readable number from "*in" into *value. On success,
+// advances "*in" past the consumed number and sets "*val" to the
+// numeric value. Otherwise, returns false and leaves *in in an
+// unspecified state.
+extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_LOGGING_H_
diff --git a/api/leveldb/util/options.cc b/api/leveldb/util/options.cc
new file mode 100644
index 00000000000..6956ab6e739
--- /dev/null
+++ b/api/leveldb/util/options.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "hyperleveldb/options.h"
+
+#include "hyperleveldb/comparator.h"
+#include "hyperleveldb/env.h"
+
+namespace leveldb {
+
+Options::Options()
+ : comparator(BytewiseComparator()),
+ create_if_missing(false),
+ error_if_exists(false),
+ paranoid_checks(false),
+ env(Env::Default()),
+ info_log(NULL),
+ write_buffer_size(4<<20),
+ max_open_files(1000),
+ block_cache(NULL),
+ block_size(4096),
+ block_restart_interval(16),
+ compression(kSnappyCompression),
+ filter_policy(NULL) {
+}
+
+
+} // namespace leveldb
diff --git a/api/leveldb/util/posix_logger.h b/api/leveldb/util/posix_logger.h
new file mode 100644
index 00000000000..f15de45e05e
--- /dev/null
+++ b/api/leveldb/util/posix_logger.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+#define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+
+#include <algorithm>
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+class PosixLogger : public Logger {
+ private:
+ FILE* file_;
+ uint64_t (*gettid_)(); // Return the thread id for the current thread
+ public:
+ PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { }
+ virtual ~PosixLogger() {
+ fclose(file_);
+ }
+ virtual void Logv(const char* format, va_list ap) {
+ const uint64_t thread_id = (*gettid_)();
+
+ // We try twice: the first time with a fixed-size stack allocated buffer,
+ // and the second time with a much larger dynamically allocated buffer.
+ char buffer[500];
+ for (int iter = 0; iter < 2; iter++) {
+ char* base;
+ int bufsize;
+ if (iter == 0) {
+ bufsize = sizeof(buffer);
+ base = buffer;
+ } else {
+ bufsize = 30000;
+ base = new char[bufsize];
+ }
+ char* p = base;
+ char* limit = base + bufsize;
+
+ struct timeval now_tv;
+ gettimeofday(&now_tv, NULL);
+ const time_t seconds = now_tv.tv_sec;
+ struct tm t;
+ localtime_r(&seconds, &t);
+ p += snprintf(p, limit - p,
+ "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ",
+ t.tm_year + 1900,
+ t.tm_mon + 1,
+ t.tm_mday,
+ t.tm_hour,
+ t.tm_min,
+ t.tm_sec,
+ static_cast<int>(now_tv.tv_usec),
+ static_cast<long long unsigned int>(thread_id));
+
+ // Print the message
+ if (p < limit) {
+ va_list backup_ap;
+ va_copy(backup_ap, ap);
+ p += vsnprintf(p, limit - p, format, backup_ap);
+ va_end(backup_ap);
+ }
+
+ // Truncate to available space if necessary
+ if (p >= limit) {
+ if (iter == 0) {
+ continue; // Try again with larger buffer
+ } else {
+ p = limit - 1;
+ }
+ }
+
+ // Add newline if necessary
+ if (p == base || p[-1] != '\n') {
+ *p++ = '\n';
+ }
+
+ assert(p <= limit);
+ fwrite(base, 1, p - base, file_);
+ fflush(file_);
+ if (base != buffer) {
+ delete[] base;
+ }
+ break;
+ }
+ }
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
diff --git a/api/leveldb/util/random.h b/api/leveldb/util/random.h
new file mode 100644
index 00000000000..66e0c94e7cb
--- /dev/null
+++ b/api/leveldb/util/random.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_
+#define STORAGE_LEVELDB_UTIL_RANDOM_H_
+
+#include <stdint.h>
+
+namespace leveldb {
+
+// A very simple random number generator. Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package.
+class Random {
+ private:
+ uint32_t seed_;
+ public:
+ explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { }
+ uint32_t Next() {
+ static const uint32_t M = 2147483647L; // 2^31-1
+ static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
+ // We are computing
+ // seed_ = (seed_ * A) % M, where M = 2^31-1
+ //
+ // seed_ must not be zero or M, or else all subsequent computed values
+ // will be zero or M respectively. For all other values, seed_ will end
+ // up cycling through every number in [1,M-1]
+ uint64_t product = seed_ * A;
+
+ // Compute (product % M) using the fact that ((x << 31) % M) == x.
+ seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
+ // The first reduction may overflow by 1 bit, so we may need to
+ // repeat. mod == M is not possible; using > allows the faster
+ // sign-bit-based test.
+ if (seed_ > M) {
+ seed_ -= M;
+ }
+ return seed_;
+ }
+ // Returns a uniformly distributed value in the range [0..n-1]
+ // REQUIRES: n > 0
+ uint32_t Uniform(int n) { return Next() % n; }
+
+ // Randomly returns true ~"1/n" of the time, and false otherwise.
+ // REQUIRES: n > 0
+ bool OneIn(int n) { return (Next() % n) == 0; }
+
+ // Skewed: pick "base" uniformly from range [0,max_log] and then
+ // return "base" random bits. The effect is to pick a number in the
+ // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+ uint32_t Skewed(int max_log) {
+ return Uniform(1 << Uniform(max_log + 1));
+ }
+
+ // Shuffle the array into random order
+ void Shuffle(int *array, int n) {
+ if (n > 1) {
+ int i;
+ for (i=0; i<n-1; i++) {
+ int j = i + Next() / (2147483647 / (n-i) + 1);
+ int t = array[j];
+ array[j] = array[i];
+ array[i] = t;
+ }
+ }
+ }
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_RANDOM_H_
diff --git a/api/leveldb/util/status.cc b/api/leveldb/util/status.cc
new file mode 100644
index 00000000000..51a8210b679
--- /dev/null
+++ b/api/leveldb/util/status.cc
@@ -0,0 +1,74 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+const char* Status::CopyState(const char* state) {
+ uint32_t size;
+ memcpy(&size, state, sizeof(size));
+ char* result = new char[size + 5];
+ memcpy(result, state, size + 5);
+ return result;
+}
+
+Status::Status(Code code, const Slice& msg, const Slice& msg2) {
+ assert(code != kOk);
+ const uint32_t len1 = msg.size();
+ const uint32_t len2 = msg2.size();
+ const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
+ char* result = new char[size + 5];
+ memcpy(result, &size, sizeof(size));
+ result[4] = static_cast<char>(code);
+ memcpy(result + 5, msg.data(), len1);
+ if (len2) {
+ result[5 + len1] = ':';
+ result[6 + len1] = ' ';
+ memcpy(result + 7 + len1, msg2.data(), len2);
+ }
+ state_ = result;
+}
+
+std::string Status::ToString() const {
+ if (state_ == NULL) {
+ return "OK";
+ } else {
+ char tmp[30];
+ const char* type;
+ switch (code()) {
+ case kOk:
+ type = "OK";
+ break;
+ case kNotFound:
+ type = "NotFound: ";
+ break;
+ case kCorruption:
+ type = "Corruption: ";
+ break;
+ case kNotSupported:
+ type = "Not implemented: ";
+ break;
+ case kInvalidArgument:
+ type = "Invalid argument: ";
+ break;
+ case kIOError:
+ type = "IO error: ";
+ break;
+ default:
+ snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
+ static_cast<int>(code()));
+ type = tmp;
+ break;
+ }
+ std::string result(type);
+ uint32_t length;
+ memcpy(&length, state_, sizeof(length));
+ result.append(state_ + 5, length);
+ return result;
+ }
+}
+
+} // namespace leveldb