Implement the LevelDB API, backed by WiredTiger.

author: Michael Cahill <michael.cahill@wiredtiger.com> 2014-05-16 20:13:51 +1000
committer: Michael Cahill <michael.cahill@wiredtiger.com> 2014-05-16 20:13:51 +1000
commit: 379875bc04a606b95edc091b1033d2087d8be1f2 (patch)
tree: 9ae7c1d1e1b54877ff9e4ec3f13f6d7ee66ff634 /api
parent: b4c98612cbc759b6b1a33314f0f6ae39530c1701 (diff)
download: mongo-379875bc04a606b95edc091b1033d2087d8be1f2.tar.gz
21 files changed, 3070 insertions, 0 deletions
diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am
new file mode 100644
index 00000000000..740dfb16456
--- /dev/null
+++ b/api/leveldb/Makefile.am
@@ -0,0 +1,18 @@
+AM_CPPFLAGS = -I$(top_builddir)
+
+lib_LTLIBRARIES = libwiredtiger_leveldb.la
+LDADD = $(lib_LTLIBRARIES) $(top_builddir)/libwiredtiger.la
+
+noinst_PROGRAMS = leveldb_test
+
+libwiredtiger_leveldb_la_LDFLAGS = -release @VERSION@
+libwiredtiger_leveldb_la_SOURCES = \
+	leveldb_wt.cc \
+	db/write_batch.cc \
+	util/coding.cc util/comparator.cc util/env.cc util/env_posix.cc \
+	util/logging.cc util/options.cc util/status.cc
+
+leveldb_test_SOURCES = leveldb_test.cc
+#leveldb_test_LDADD = $(top_builddir)/libwiredtiger.la
+
+TESTS = $(noinst_PROGRAMS)
diff --git a/api/leveldb/db/dbformat.h b/api/leveldb/db/dbformat.h
new file mode 100644
index 00000000000..37c21ed95e7
--- /dev/null
+++ b/api/leveldb/db/dbformat.h
@@ -0,0 +1,223 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_FORMAT_H_
+#define STORAGE_LEVELDB_DB_FORMAT_H_
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+#include "util/coding.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+// Grouping of constants.  We may want to make some of these
+// parameters set via options.
+namespace config {
+static const int kNumLevels = 7;
+
+// Level-0 compaction is started when we hit this many files.
+static const int kL0_CompactionTrigger = 4;
+
+// Soft limit on number of level-0 files.  We slow down writes at this point.
+static const int kL0_SlowdownWritesTrigger = 8;
+
+// Maximum number of level-0 files.  We stop writes at this point.
+static const int kL0_StopWritesTrigger = 12;
+
+// Maximum level to which a new compacted memtable is pushed if it
+// does not create overlap.  We try to push to level 2 to avoid the
+// relatively expensive level 0=>1 compactions and to avoid some
+// expensive manifest file operations.  We do not push all the way to
+// the largest level since that can generate a lot of wasted disk
+// space if the same key space is being repeatedly overwritten.
+static const int kMaxMemCompactLevel = 2;
+
+}  // namespace config
+
+class InternalKey;
+
+// Value types encoded as the last component of internal keys.
+// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
+// data structures.
+enum ValueType {
+  kTypeDeletion = 0x0,
+  kTypeValue = 0x1
+};
+// kValueTypeForSeek defines the ValueType that should be passed when
+// constructing a ParsedInternalKey object for seeking to a particular
+// sequence number (since we sort sequence numbers in decreasing order
+// and the value type is embedded as the low 8 bits in the sequence
+// number in internal keys, we need to use the highest-numbered
+// ValueType, not the lowest).
+static const ValueType kValueTypeForSeek = kTypeValue;
+
+typedef uint64_t SequenceNumber;
+
+// We leave eight bits empty at the bottom so a type and sequence#
+// can be packed together into 64-bits.
+static const SequenceNumber kMaxSequenceNumber =
+    ((0x1ull << 56) - 1);
+
+struct ParsedInternalKey {
+  Slice user_key;
+  SequenceNumber sequence;
+  ValueType type;
+
+  ParsedInternalKey() { }  // Intentionally left uninitialized (for speed)
+  ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
+      : user_key(u), sequence(seq), type(t) { }
+  std::string DebugString() const;
+};
+
+// Return the length of the encoding of "key".
+inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
+  return key.user_key.size() + 8;
+}
+
+// Append the serialization of "key" to *result.
+extern void AppendInternalKey(std::string* result,
+                              const ParsedInternalKey& key);
+
+// Attempt to parse an internal key from "internal_key".  On success,
+// stores the parsed data in "*result", and returns true.
+//
+// On error, returns false, leaves "*result" in an undefined state.
+extern bool ParseInternalKey(const Slice& internal_key,
+                             ParsedInternalKey* result);
+
+// Returns the user key portion of an internal key.
+inline Slice ExtractUserKey(const Slice& internal_key) {
+  assert(internal_key.size() >= 8);
+  return Slice(internal_key.data(), internal_key.size() - 8);
+}
+
+inline ValueType ExtractValueType(const Slice& internal_key) {
+  assert(internal_key.size() >= 8);
+  const size_t n = internal_key.size();
+  uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+  unsigned char c = num & 0xff;
+  return static_cast<ValueType>(c);
+}
+
+// A comparator for internal keys that uses a specified comparator for
+// the user key portion and breaks ties by decreasing sequence number.
+class InternalKeyComparator : public Comparator {
+ private:
+  const Comparator* user_comparator_;
+ public:
+  explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
+  virtual const char* Name() const;
+  virtual int Compare(const Slice& a, const Slice& b) const;
+  virtual void FindShortestSeparator(
+      std::string* start,
+      const Slice& limit) const;
+  virtual void FindShortSuccessor(std::string* key) const;
+
+  const Comparator* user_comparator() const { return user_comparator_; }
+
+  int Compare(const InternalKey& a, const InternalKey& b) const;
+};
+
+// Filter policy wrapper that converts from internal keys to user keys
+class InternalFilterPolicy : public FilterPolicy {
+ private:
+  const FilterPolicy* const user_policy_;
+ public:
+  explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
+  virtual const char* Name() const;
+  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
+  virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
+};
+
+// Modules in this directory should keep internal keys wrapped inside
+// the following class instead of plain strings so that we do not
+// incorrectly use string comparisons instead of an InternalKeyComparator.
+class InternalKey {
+ private:
+  std::string rep_;
+ public:
+  InternalKey() { }   // Leave rep_ as empty to indicate it is invalid
+  InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
+    AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
+  }
+
+  void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
+  Slice Encode() const {
+    assert(!rep_.empty());
+    return rep_;
+  }
+
+  Slice user_key() const { return ExtractUserKey(rep_); }
+
+  void SetFrom(const ParsedInternalKey& p) {
+    rep_.clear();
+    AppendInternalKey(&rep_, p);
+  }
+
+  void Clear() { rep_.clear(); }
+
+  std::string DebugString() const;
+};
+
+inline int InternalKeyComparator::Compare(
+    const InternalKey& a, const InternalKey& b) const {
+  return Compare(a.Encode(), b.Encode());
+}
+
+inline bool ParseInternalKey(const Slice& internal_key,
+                             ParsedInternalKey* result) {
+  const size_t n = internal_key.size();
+  if (n < 8) return false;
+  uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+  unsigned char c = num & 0xff;
+  result->sequence = num >> 8;
+  result->type = static_cast<ValueType>(c);
+  result->user_key = Slice(internal_key.data(), n - 8);
+  return (c <= static_cast<unsigned char>(kTypeValue));
+}
+
+// A helper class useful for DBImpl::Get()
+class LookupKey {
+ public:
+  // Initialize *this for looking up user_key at a snapshot with
+  // the specified sequence number.
+  LookupKey(const Slice& user_key, SequenceNumber sequence);
+
+  ~LookupKey();
+
+  // Return a key suitable for lookup in a MemTable.
+  Slice memtable_key() const { return Slice(start_, end_ - start_); }
+
+  // Return an internal key (suitable for passing to an internal iterator)
+  Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
+
+  // Return the user key
+  Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
+
+ private:
+  // We construct a char array of the form:
+  //    klength  varint32               <-- start_
+  //    userkey  char[klength]          <-- kstart_
+  //    tag      uint64
+  //                                    <-- end_
+  // The array is a suitable MemTable key.
+  // The suffix starting with "userkey" can be used as an InternalKey.
+  const char* start_;
+  const char* kstart_;
+  const char* end_;
+  char space_[200];      // Avoid allocation for short keys
+
+  // No copying allowed
+  LookupKey(const LookupKey&);
+  void operator=(const LookupKey&);
+};
+
+inline LookupKey::~LookupKey() {
+  if (start_ != space_) delete[] start_;
+}
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_DB_FORMAT_H_
diff --git a/api/leveldb/db/skiplist.h b/api/leveldb/db/skiplist.h
new file mode 100644
index 00000000000..af85be6d016
--- /dev/null
+++ b/api/leveldb/db/skiplist.h
@@ -0,0 +1,379 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Thread safety
+// -------------
+//
+// Writes require external synchronization, most likely a mutex.
+// Reads require a guarantee that the SkipList will not be destroyed
+// while the read is in progress.  Apart from that, reads progress
+// without any internal locking or synchronization.
+//
+// Invariants:
+//
+// (1) Allocated nodes are never deleted until the SkipList is
+// destroyed.  This is trivially guaranteed by the code since we
+// never delete any skip list nodes.
+//
+// (2) The contents of a Node except for the next/prev pointers are
+// immutable after the Node has been linked into the SkipList.
+// Only Insert() modifies the list, and it is careful to initialize
+// a node and use release-stores to publish the nodes in one or
+// more lists.
+//
+// ... prev vs. next pointer ordering ...
+
+#include <assert.h>
+#include <stdlib.h>
+#include "port/port.h"
+#include "util/arena.h"
+#include "util/random.h"
+
+namespace leveldb {
+
+class Arena;
+
+template<typename Key, class Comparator>
+class SkipList {
+ private:
+  struct Node;
+
+ public:
+  // Create a new SkipList object that will use "cmp" for comparing keys,
+  // and will allocate memory using "*arena".  Objects allocated in the arena
+  // must remain allocated for the lifetime of the skiplist object.
+  explicit SkipList(Comparator cmp, Arena* arena);
+
+  // Insert key into the list.
+  // REQUIRES: nothing that compares equal to key is currently in the list.
+  void Insert(const Key& key);
+
+  // Returns true iff an entry that compares equal to key is in the list.
+  bool Contains(const Key& key) const;
+
+  // Iteration over the contents of a skip list
+  class Iterator {
+   public:
+    // Initialize an iterator over the specified list.
+    // The returned iterator is not valid.
+    explicit Iterator(const SkipList* list);
+
+    // Returns true iff the iterator is positioned at a valid node.
+    bool Valid() const;
+
+    // Returns the key at the current position.
+    // REQUIRES: Valid()
+    const Key& key() const;
+
+    // Advances to the next position.
+    // REQUIRES: Valid()
+    void Next();
+
+    // Advances to the previous position.
+    // REQUIRES: Valid()
+    void Prev();
+
+    // Advance to the first entry with a key >= target
+    void Seek(const Key& target);
+
+    // Position at the first entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToFirst();
+
+    // Position at the last entry in list.
+    // Final state of iterator is Valid() iff list is not empty.
+    void SeekToLast();
+
+   private:
+    const SkipList* list_;
+    Node* node_;
+    // Intentionally copyable
+  };
+
+ private:
+  enum { kMaxHeight = 12 };
+
+  // Immutable after construction
+  Comparator const compare_;
+  Arena* const arena_;    // Arena used for allocations of nodes
+
+  Node* const head_;
+
+  // Modified only by Insert().  Read racily by readers, but stale
+  // values are ok.
+  port::AtomicPointer max_height_;   // Height of the entire list
+
+  inline int GetMaxHeight() const {
+    return static_cast<int>(
+        reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
+  }
+
+  // Read/written only by Insert().
+  Random rnd_;
+
+  Node* NewNode(const Key& key, int height);
+  int RandomHeight();
+  bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
+
+  // Return true if key is greater than the data stored in "n"
+  bool KeyIsAfterNode(const Key& key, Node* n) const;
+
+  // Return the earliest node that comes at or after key.
+  // Return NULL if there is no such node.
+  //
+  // If prev is non-NULL, fills prev[level] with pointer to previous
+  // node at "level" for every level in [0..max_height_-1].
+  Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
+
+  // Return the latest node with a key < key.
+  // Return head_ if there is no such node.
+  Node* FindLessThan(const Key& key) const;
+
+  // Return the last node in the list.
+  // Return head_ if list is empty.
+  Node* FindLast() const;
+
+  // No copying allowed
+  SkipList(const SkipList&);
+  void operator=(const SkipList&);
+};
+
+// Implementation details follow
+template<typename Key, class Comparator>
+struct SkipList<Key,Comparator>::Node {
+  explicit Node(const Key& k) : key(k) { }
+
+  Key const key;
+
+  // Accessors/mutators for links.  Wrapped in methods so we can
+  // add the appropriate barriers as necessary.
+  Node* Next(int n) {
+    assert(n >= 0);
+    // Use an 'acquire load' so that we observe a fully initialized
+    // version of the returned Node.
+    return reinterpret_cast<Node*>(next_[n].Acquire_Load());
+  }
+  void SetNext(int n, Node* x) {
+    assert(n >= 0);
+    // Use a 'release store' so that anybody who reads through this
+    // pointer observes a fully initialized version of the inserted node.
+    next_[n].Release_Store(x);
+  }
+
+  // No-barrier variants that can be safely used in a few locations.
+  Node* NoBarrier_Next(int n) {
+    assert(n >= 0);
+    return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
+  }
+  void NoBarrier_SetNext(int n, Node* x) {
+    assert(n >= 0);
+    next_[n].NoBarrier_Store(x);
+  }
+
+ private:
+  // Array of length equal to the node height.  next_[0] is lowest level link.
+  port::AtomicPointer next_[1];
+};
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
+  char* mem = arena_->AllocateAligned(
+      sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
+  return new (mem) Node(key);
+}
+
+template<typename Key, class Comparator>
+inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
+  list_ = list;
+  node_ = NULL;
+}
+
+template<typename Key, class Comparator>
+inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
+  return node_ != NULL;
+}
+
+template<typename Key, class Comparator>
+inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
+  assert(Valid());
+  return node_->key;
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Next() {
+  assert(Valid());
+  node_ = node_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Prev() {
+  // Instead of using explicit "prev" links, we just search for the
+  // last node that falls before key.
+  assert(Valid());
+  node_ = list_->FindLessThan(node_->key);
+  if (node_ == list_->head_) {
+    node_ = NULL;
+  }
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
+  node_ = list_->FindGreaterOrEqual(target, NULL);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
+  node_ = list_->head_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
+  node_ = list_->FindLast();
+  if (node_ == list_->head_) {
+    node_ = NULL;
+  }
+}
+
+template<typename Key, class Comparator>
+int SkipList<Key,Comparator>::RandomHeight() {
+  // Increase height with probability 1 in kBranching
+  static const unsigned int kBranching = 4;
+  int height = 1;
+  while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
+    height++;
+  }
+  assert(height > 0);
+  assert(height <= kMaxHeight);
+  return height;
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
+  // NULL n is considered infinite
+  return (n != NULL) && (compare_(n->key, key) < 0);
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
+    const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    Node* next = x->Next(level);
+    if (KeyIsAfterNode(key, next)) {
+      // Keep searching in this list
+      x = next;
+    } else {
+      if (prev != NULL) prev[level] = x;
+      if (level == 0) {
+        return next;
+      } else {
+        // Switch to next list
+        level--;
+      }
+    }
+  }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    assert(x == head_ || compare_(x->key, key) < 0);
+    Node* next = x->Next(level);
+    if (next == NULL || compare_(next->key, key) >= 0) {
+      if (level == 0) {
+        return x;
+      } else {
+        // Switch to next list
+        level--;
+      }
+    } else {
+      x = next;
+    }
+  }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
+    const {
+  Node* x = head_;
+  int level = GetMaxHeight() - 1;
+  while (true) {
+    Node* next = x->Next(level);
+    if (next == NULL) {
+      if (level == 0) {
+        return x;
+      } else {
+        // Switch to next list
+        level--;
+      }
+    } else {
+      x = next;
+    }
+  }
+}
+
+template<typename Key, class Comparator>
+SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
+    : compare_(cmp),
+      arena_(arena),
+      head_(NewNode(0 /* any key will do */, kMaxHeight)),
+      max_height_(reinterpret_cast<void*>(1)),
+      rnd_(0xdeadbeef) {
+  for (int i = 0; i < kMaxHeight; i++) {
+    head_->SetNext(i, NULL);
+  }
+}
+
+template<typename Key, class Comparator>
+void SkipList<Key,Comparator>::Insert(const Key& key) {
+  // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
+  // here since Insert() is externally synchronized.
+  Node* prev[kMaxHeight];
+  Node* x = FindGreaterOrEqual(key, prev);
+
+  // Our data structure does not allow duplicate insertion
+  assert(x == NULL || !Equal(key, x->key));
+
+  int height = RandomHeight();
+  if (height > GetMaxHeight()) {
+    for (int i = GetMaxHeight(); i < height; i++) {
+      prev[i] = head_;
+    }
+    //fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
+
+    // It is ok to mutate max_height_ without any synchronization
+    // with concurrent readers.  A concurrent reader that observes
+    // the new value of max_height_ will see either the old value of
+    // new level pointers from head_ (NULL), or a new value set in
+    // the loop below.  In the former case the reader will
+    // immediately drop to the next level since NULL sorts after all
+    // keys.  In the latter case the reader will use the new node.
+    max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
+  }
+
+  x = NewNode(key, height);
+  for (int i = 0; i < height; i++) {
+    // NoBarrier_SetNext() suffices since we will add a barrier when
+    // we publish a pointer to "x" in prev[i].
+    x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
+    prev[i]->SetNext(i, x);
+  }
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::Contains(const Key& key) const {
+  Node* x = FindGreaterOrEqual(key, NULL);
+  if (x != NULL && Equal(key, x->key)) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/db/write_batch.cc b/api/leveldb/db/write_batch.cc
new file mode 100644
index 00000000000..6a488a83e3e
--- /dev/null
+++ b/api/leveldb/db/write_batch.cc
@@ -0,0 +1,109 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch::rep_ :=
+//    sequence: fixed64
+//    count: fixed32
+//    data: record[count]
+// record :=
+//    kTypeValue varstring varstring         |
+//    kTypeDeletion varstring
+// varstring :=
+//    len: varint32
+//    data: uint8[len]
+
+#include "leveldb_wt.h"
+#include "db/write_batch_internal.h"
+
+namespace leveldb {
+
+// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
+static const size_t kHeader = 12;
+
+WriteBatch::WriteBatch() {
+  Clear();
+}
+
+WriteBatch::~WriteBatch() { }
+
+WriteBatch::Handler::~Handler() { }
+
+void WriteBatch::Clear() {
+  rep_.clear();
+  rep_.resize(kHeader);
+}
+
+Status WriteBatch::Iterate(Handler* handler) const {
+  Slice input(rep_);
+  if (input.size() < kHeader) {
+    return Status::Corruption("malformed WriteBatch (too small)");
+  }
+
+  input.remove_prefix(kHeader);
+  Slice key, value;
+  int found = 0;
+  while (!input.empty()) {
+    found++;
+    char tag = input[0];
+    input.remove_prefix(1);
+    switch (tag) {
+      case kTypeValue:
+        if (GetLengthPrefixedSlice(&input, &key) &&
+            GetLengthPrefixedSlice(&input, &value)) {
+          handler->Put(key, value);
+        } else {
+          return Status::Corruption("bad WriteBatch Put");
+        }
+        break;
+      case kTypeDeletion:
+        if (GetLengthPrefixedSlice(&input, &key)) {
+          handler->Delete(key);
+        } else {
+          return Status::Corruption("bad WriteBatch Delete");
+        }
+        break;
+      default:
+        return Status::Corruption("unknown WriteBatch tag");
+    }
+  }
+  if (found != WriteBatchInternal::Count(this)) {
+    return Status::Corruption("WriteBatch has wrong count");
+  } else {
+    return Status::OK();
+  }
+}
+
+int WriteBatchInternal::Count(const WriteBatch* b) {
+  return DecodeFixed32(b->rep_.data() + 8);
+}
+
+void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
+  EncodeFixed32(&b->rep_[8], n);
+}
+
+void WriteBatch::Put(const Slice& key, const Slice& value) {
+  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+  rep_.push_back(static_cast<char>(kTypeValue));
+  PutLengthPrefixedSlice(&rep_, key);
+  PutLengthPrefixedSlice(&rep_, value);
+}
+
+void WriteBatch::Delete(const Slice& key) {
+  WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+  rep_.push_back(static_cast<char>(kTypeDeletion));
+  PutLengthPrefixedSlice(&rep_, key);
+}
+
+void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
+  assert(contents.size() >= kHeader);
+  b->rep_.assign(contents.data(), contents.size());
+}
+
+void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
+  SetCount(dst, Count(dst) + Count(src));
+  assert(src->rep_.size() >= kHeader);
+  dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/db/write_batch_internal.h b/api/leveldb/db/write_batch_internal.h
new file mode 100644
index 00000000000..b4295004e13
--- /dev/null
+++ b/api/leveldb/db/write_batch_internal.h
@@ -0,0 +1,39 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+
+#include "leveldb_wt.h"
+#include "db/dbformat.h"
+
+namespace leveldb {
+
+// WriteBatchInternal provides static methods for manipulating a
+// WriteBatch that we don't want in the public WriteBatch interface.
+class WriteBatchInternal {
+ public:
+  // Return the number of entries in the batch.
+  static int Count(const WriteBatch* batch);
+
+  // Set the count for the number of entries in the batch.
+  static void SetCount(WriteBatch* batch, int n);
+
+  static Slice Contents(const WriteBatch* batch) {
+    return Slice(batch->rep_);
+  }
+
+  static size_t ByteSize(const WriteBatch* batch) {
+    return batch->rep_.size();
+  }
+
+  static void SetContents(WriteBatch* batch, const Slice& contents);
+
+  static void Append(WriteBatch* dst, const WriteBatch* src);
+};
+
+}  // namespace leveldb
+
+
+#endif  // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
diff --git a/api/leveldb/leveldb_test.cc b/api/leveldb/leveldb_test.cc
new file mode 100644
index 00000000000..d3b78b4eb4b
--- /dev/null
+++ b/api/leveldb/leveldb_test.cc
@@ -0,0 +1,36 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+#include <assert.h>
+#include <iostream>
+#include <hyperleveldb/db.h>
+
+using namespace std;
+
+extern "C" int main() {
+	leveldb::DB* db;
+	leveldb::Options options;
+	options.create_if_missing = true;
+	leveldb::Status s = leveldb::DB::Open(options, "WTLDB_HOME", &db);
+	assert(s.ok());
+
+	s = db->Put(leveldb::WriteOptions(), "key", "value");
+	assert(s.ok());
+
+	leveldb::ReadOptions read_options;
+	read_options.snapshot = db->GetSnapshot();
+	leveldb::Iterator* iter = db->NewIterator(read_options);
+	for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+		cout << iter->key().ToString() << ": "  << iter->value().ToString() << endl;
+	}
+
+	delete iter;
+	db->ReleaseSnapshot(read_options.snapshot);
+
+	delete db;
+
+	return (0);
+}
diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc
new file mode 100644
index 00000000000..defa8b73556
--- /dev/null
+++ b/api/leveldb/leveldb_wt.cc
@@ -0,0 +1,673 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+#include "leveldb_wt.h"
+
+using leveldb::FilterPolicy;
+using leveldb::Iterator;
+using leveldb::Options;
+using leveldb::ReadOptions;
+using leveldb::WriteBatch;
+using leveldb::WriteOptions;
+using leveldb::Range;
+using leveldb::Slice;
+using leveldb::Snapshot;
+using leveldb::Status;
+#ifndef WITHOUT_HYPERLEVELDB
+namespace leveldb {
+class ReplayIterator;
+}
+#else
+using leveldb::Value;
+#endif
+
+#define	WT_URI	"table:data"
+#define	WT_CONFIG	"type=lsm,leaf_page_max=4KB,leaf_item_max=1KB"
+
+/* Destructors required for interfaces. */
+leveldb::DB::~DB() {}
+Snapshot::~Snapshot() {}
+
+/* Iterators, from leveldb/table/iterator.cc */
+Iterator::Iterator() {
+  cleanup_.function = NULL;
+  cleanup_.next = NULL;
+}
+
+Iterator::~Iterator() {
+  if (cleanup_.function != NULL) {
+    (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2);
+    for (Cleanup* c = cleanup_.next; c != NULL; ) {
+      (*c->function)(c->arg1, c->arg2);
+      Cleanup* next = c->next;
+      delete c;
+      c = next;
+    }
+  }
+}
+
+void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
+  assert(func != NULL);
+  Cleanup* c;
+  if (cleanup_.function == NULL) {
+    c = &cleanup_;
+  } else {
+    c = new Cleanup;
+    c->next = cleanup_.next;
+    cleanup_.next = c;
+  }
+  c->function = func;
+  c->arg1 = arg1;
+  c->arg2 = arg2;
+}
+
+namespace {
+class EmptyIterator : public Iterator {
+ public:
+  EmptyIterator(const Status& s) : status_(s) { }
+  virtual bool Valid() const { return false; }
+  virtual void Seek(const Slice& target) { }
+  virtual void SeekToFirst() { }
+  virtual void SeekToLast() { }
+  virtual void Next() { assert(false); }
+  virtual void Prev() { assert(false); }
+  Slice key() const { assert(false); return Slice(); }
+  Slice value() const { assert(false); return Slice(); }
+  virtual Status status() const { return status_; }
+ private:
+  Status status_;
+};
+}  // namespace
+
+Iterator* NewEmptyIterator() {
+  return new EmptyIterator(Status::OK());
+}
+
+Iterator* NewErrorIterator(const Status& status) {
+  return new EmptyIterator(status);
+}
+
+namespace leveldb {
+const FilterPolicy *NewBloomFilterPolicy(int bits_per_key) {
+  return 0;
+}
+
+Cache *NewLRUCache(size_t capacity) {
+  return 0;
+}
+
+Status DestroyDB(const std::string& name, const Options& options) {
+  fprintf(stderr, "DestroyDB %s", name.c_str());
+  return Status::NotSupported("sorry!");
+}
+
+Status RepairDB(const std::string& dbname, const Options& options) {
+  return Status::NotSupported("sorry!");
+}
+}
+
+/* POSIX thread-local storage */
+template <class T>
+class ThreadLocal {
+public:
+	static void cleanup(void *val) {
+		delete (T *)val;
+	}
+
+	ThreadLocal() {
+		int ret = pthread_key_create(&key_, cleanup);
+		assert(ret == 0);
+	}
+
+	~ThreadLocal() {
+		int ret = pthread_key_delete(key_);
+		assert(ret == 0);
+	}
+
+	T *get() {
+		return (T *)(pthread_getspecific(key_));
+	}
+
+	void set(T *value) {
+		int ret = pthread_setspecific(key_, value);
+		assert(ret == 0);
+	}
+
+private:
+	pthread_key_t key_;
+};
+
+/* WiredTiger implementations. */
+class DbImpl;
+
+/* Context for operations (including snapshots, write batches, transactions) */
+class OperationContext {
+public:
+	OperationContext(WT_CONNECTION *conn) {
+		int ret = conn->open_session(conn, NULL, NULL, &session_);
+		assert(ret == 0);
+		ret = session_->open_cursor(
+		    session_, WT_URI, NULL, NULL, &cursor_);
+		assert(ret == 0);
+	}
+
+	~OperationContext() {
+		int ret = session_->close(session_, NULL);
+		assert(ret == 0);
+	}
+
+	WT_CURSOR *getCursor() { return cursor_; }
+
+private:
+	WT_SESSION *session_;
+	WT_CURSOR *cursor_;
+};
+
+class IteratorImpl : public Iterator {
+public:
+	IteratorImpl(WT_CURSOR *cursor, DbImpl *db, const ReadOptions &options) : cursor_(cursor), valid_(false) {}
+	virtual ~IteratorImpl() {}
+
+	// An iterator is either positioned at a key/value pair, or
+	// not valid.  This method returns true iff the iterator is valid.
+	virtual bool Valid() const { return valid_; }
+
+	virtual void SeekToFirst();
+
+	virtual void SeekToLast();
+
+	virtual void Seek(const Slice& target);
+
+	virtual void Next();
+
+	virtual void Prev();
+
+	virtual Slice key() const {
+		return key_;
+	}
+
+	virtual Slice value() const {
+		return value_;
+	}
+
+	virtual Status status() const {
+		return status_;
+	}
+
+private:
+	WT_CURSOR *cursor_;
+	Slice key_, value_;
+	Status status_;
+	bool valid_;
+
+	// No copying allowed
+	IteratorImpl(const IteratorImpl&);
+	void operator=(const IteratorImpl&);
+};
+
+class SnapshotImpl : public Snapshot {
+public:
+	SnapshotImpl(DbImpl *db) : Snapshot() {}
+	virtual ~SnapshotImpl() {}
+};
+
+class DbImpl : public leveldb::DB {
+public:
+	DbImpl(WT_CONNECTION *conn) : DB(), conn_(conn), context_(new ThreadLocal<OperationContext>) {}
+	virtual ~DbImpl() {
+		delete context_;
+		int ret = conn_->close(conn_, NULL);
+		assert(ret == 0);
+	}
+
+	virtual Status Put(const WriteOptions& options,
+		     const Slice& key,
+		     const Slice& value);
+
+	virtual Status Delete(const WriteOptions& options, const Slice& key);
+
+	virtual Status Write(const WriteOptions& options, WriteBatch* updates);
+
+	virtual Status Get(const ReadOptions& options,
+		     const Slice& key, std::string* value);
+
+#ifndef WITHOUT_HYPERLEVELDB
+	virtual Status LiveBackup(const Slice& name) { return Status::NotSupported("sorry!"); }
+	virtual void GetReplayTimestamp(std::string* timestamp) {}
+	virtual void AllowGarbageCollectBeforeTimestamp(const std::string& timestamp) {}
+	virtual bool ValidateTimestamp(const std::string& timestamp) {}
+	virtual int CompareTimestamps(const std::string& lhs, const std::string& rhs) {}
+	virtual Status GetReplayIterator(const std::string& timestamp,
+					   leveldb::ReplayIterator** iter) { return Status::NotSupported("sorry!"); }
+	virtual void ReleaseReplayIterator(leveldb::ReplayIterator* iter) {}
+#else
+	virtual Status Get(const ReadOptions& options,
+		     const Slice& key, Value* value);
+#endif
+
+	virtual Iterator* NewIterator(const ReadOptions& options);
+
+	virtual const Snapshot* GetSnapshot();
+
+	virtual void ReleaseSnapshot(const Snapshot* snapshot);
+
+	virtual bool GetProperty(const Slice& property, std::string* value);
+
+	virtual void GetApproximateSizes(const Range* range, int n,
+				   uint64_t* sizes);
+
+	virtual void CompactRange(const Slice* begin, const Slice* end);
+
+	virtual void SuspendCompactions();
+	
+	virtual void ResumeCompactions();
+
+private:
+	WT_CONNECTION *conn_;
+	ThreadLocal<OperationContext> *context_;
+
+	OperationContext *getContext() {
+		OperationContext *ctx = context_->get();
+		if (ctx == NULL) {
+			ctx = new OperationContext(conn_);
+			context_->set(ctx);
+		}
+		return (ctx);
+	}
+
+	WT_CURSOR *getCursor() { return getContext()->getCursor(); }
+
+	// No copying allowed
+	DbImpl(const DbImpl&);
+	void operator=(const DbImpl&);
+};
+
+Status
+leveldb::DB::Open(const Options &options, const std::string &name, leveldb::DB **dbptr)
+{
+	WT_CONNECTION *conn;
+	int ret = ::wiredtiger_open(name.c_str(), NULL, "create", &conn);
+	assert(ret == 0);
+
+	WT_SESSION *session;
+	ret = conn->open_session(conn, NULL, NULL, &session);
+	assert(ret == 0);
+	ret = session->create(session, WT_URI, WT_CONFIG);
+	assert(ret == 0);
+	ret = session->close(session, NULL);
+	assert(ret == 0);
+
+	*dbptr = new DbImpl(conn);
+	return Status::OK();
+}
+
+// Set the database entry for "key" to "value".  Returns OK on success,
+// and a non-OK status on error.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Put(const WriteOptions& options,
+	     const Slice& key, const Slice& value)
+{
+	WT_CURSOR *cursor = getCursor();
+	WT_ITEM item;
+
+	item.data = key.data();
+	item.size = key.size();
+	cursor->set_key(cursor, &item);
+	item.data = value.data();
+	item.size = value.size();
+	cursor->set_value(cursor, &item);
+	int ret = cursor->insert(cursor);
+	assert(ret == 0);
+	return Status::OK();
+}
+
+// Remove the database entry (if any) for "key".  Returns OK on
+// success, and a non-OK status on error.  It is not an error if "key"
+// did not exist in the database.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Delete(const WriteOptions& options, const Slice& key)
+{
+	WT_CURSOR *cursor = getCursor();
+	WT_ITEM item;
+
+	item.data = key.data();
+	item.size = key.size();
+	cursor->set_key(cursor, &item);
+	int ret = cursor->remove(cursor);
+	assert(ret == 0);
+	return Status::OK();
+}
+
+// Implement WriteBatch::Handler
+class WriteBatchHandler : public WriteBatch::Handler {
+public:
+	WriteBatchHandler(WT_CURSOR *cursor) : cursor_(cursor), status_(0) {}
+	virtual ~WriteBatchHandler() {}
+	int getStatus() { return status_; }
+
+	virtual void Put(const Slice& key, const Slice& value) {
+		WT_ITEM item;
+
+		item.data = key.data();
+		item.size = key.size();
+		cursor_->set_key(cursor_, &item);
+		item.data = value.data();
+		item.size = value.size();
+		cursor_->set_value(cursor_, &item);
+		int ret = cursor_->insert(cursor_);
+		if (ret != 0 && status_ == 0)
+			status_ = ret;
+	}
+	virtual void Delete(const Slice& key) {
+		WT_ITEM item;
+
+		item.data = key.data();
+		item.size = key.size();
+		cursor_->set_key(cursor_, &item);
+		int ret = cursor_->remove(cursor_);
+		if (ret != 0 && status_ == 0)
+			status_ = ret;
+	}
+
+private:
+	WT_CURSOR *cursor_;
+	int status_;
+};
+
+// Apply the specified updates to the database.
+// Returns OK on success, non-OK on failure.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Write(const WriteOptions& options, WriteBatch* updates)
+{
+	WT_CURSOR *cursor = getCursor();
+	WriteBatchHandler handler(cursor);
+	Status status = updates->Iterate(&handler);
+	assert(handler.getStatus() == 0);
+	return status;
+}
+
+// If the database contains an entry for "key" store the
+// corresponding value in *value and return OK.
+//
+// If there is no entry for "key" leave *value unchanged and return
+// a status for which Status::IsNotFound() returns true.
+//
+// May return some other Status on an error.
+Status
+DbImpl::Get(const ReadOptions& options,
+	     const Slice& key, std::string* value)
+{
+	WT_CURSOR *cursor = getCursor();
+	WT_ITEM item;
+
+	item.data = key.data();
+	item.size = key.size();
+	cursor->set_key(cursor, &item);
+	int ret = cursor->search(cursor);
+	if (ret == WT_NOTFOUND)
+		return Status::NotFound("DB::Get key not found");
+	assert(ret == 0);
+	ret = cursor->get_value(cursor, &item);
+	assert(ret == 0);
+	*value = std::string((const char *)item.data, item.size);
+	return Status::OK();
+}
+
+#ifdef WITHOUT_HYPERLEVELDB
+Status
+DbImpl::Get(const ReadOptions& options,
+	     const Slice& key, Value* value)
+{
+	WT_CURSOR *cursor = getCursor();
+	WT_ITEM item;
+
+	item.data = key.data();
+	item.size = key.size();
+	cursor->set_key(cursor, &item);
+	int ret = cursor->search(cursor);
+	if (ret == WT_NOTFOUND)
+		return Status::NotFound("DB::Get key not found");
+	assert(ret == 0);
+	ret = cursor->get_value(cursor, &item);
+	assert(ret == 0);
+	value->assign((const char *)item.data, item.size);
+	return Status::OK();
+}
+#endif
+
+// Return a heap-allocated iterator over the contents of the database.
+// The result of NewIterator() is initially invalid (caller must
+// call one of the Seek methods on the iterator before using it).
+//
+// Caller should delete the iterator when it is no longer needed.
+// The returned iterator should be deleted before this db is deleted.
+Iterator *
+DbImpl::NewIterator(const ReadOptions& options)
+{
+	return new IteratorImpl(getCursor(), this, options);
+}
+
+// Return a handle to the current DB state.  Iterators created with
+// this handle will all observe a stable snapshot of the current DB
+// state.  The caller must call ReleaseSnapshot(result) when the
+// snapshot is no longer needed.
+const Snapshot *
+DbImpl::GetSnapshot()
+{
+	return new SnapshotImpl(this);
+}
+
+// Release a previously acquired snapshot.  The caller must not
+// use "snapshot" after this call.
+void
+DbImpl::ReleaseSnapshot(const Snapshot* snapshot)
+{
+	delete (SnapshotImpl *)snapshot;
+}
+
+// DB implementations can export properties about their state
+// via this method.  If "property" is a valid property understood by this
+// DB implementation, fills "*value" with its current value and returns
+// true.  Otherwise returns false.
+//
+//
+// Valid property names include:
+//
+//  "leveldb.num-files-at-level<N>" - return the number of files at level <N>,
+//     where <N> is an ASCII representation of a level number (e.g. "0").
+//  "leveldb.stats" - returns a multi-line string that describes statistics
+//     about the internal operation of the DB.
+//  "leveldb.sstables" - returns a multi-line string that describes all
+//     of the sstables that make up the db contents.
+bool
+DbImpl::GetProperty(const Slice& property, std::string* value)
+{
+	/* Not supported */
+	return false;
+}
+
+// For each i in [0,n-1], store in "sizes[i]", the approximate
+// file system space used by keys in "[range[i].start .. range[i].limit)".
+//
+// Note that the returned sizes measure file system space usage, so
+// if the user data compresses by a factor of ten, the returned
+// sizes will be one-tenth the size of the corresponding user data size.
+//
+// The results may not include the sizes of recently written data.
+void
+DbImpl::GetApproximateSizes(const Range* range, int n,
+			   uint64_t* sizes)
+{
+	int i;
+
+	/* XXX Not supported */
+	for (i = 0; i < n; i++)
+		sizes[i] = 1;
+}
+
+// Compact the underlying storage for the key range [*begin,*end].
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data.  This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// begin==NULL is treated as a key before all keys in the database.
+// end==NULL is treated as a key after all keys in the database.
+// Therefore the following call will compact the entire database:
+//    db->CompactRange(NULL, NULL);
+void
+DbImpl::CompactRange(const Slice* begin, const Slice* end)
+{
+	/* Not supported */
+	assert(0);
+}
+
+// Suspends the background compaction thread.  This methods
+// returns once suspended.
+void
+DbImpl::SuspendCompactions()
+{
+	/* Not supported */
+	assert(0);
+}
+
+// Resumes a suspended background compation thread.
+void
+DbImpl::ResumeCompactions()
+{
+	/* Not supported */
+	assert(0);
+}
+
+// Position at the first key in the source.  The iterator is Valid()
+// after this call iff the source is not empty.
+void
+IteratorImpl::SeekToFirst()
+{
+	WT_ITEM item;
+
+	int ret = cursor_->reset(cursor_);
+	assert(ret == 0);
+	ret = cursor_->next(cursor_);
+	if (ret != 0) {
+		valid_ = false;
+		return;
+	}
+	ret = cursor_->get_key(cursor_, &item);
+	assert(ret == 0);
+	key_ = Slice((const char *)item.data, item.size);
+	ret = cursor_->get_value(cursor_, &item);
+	assert(ret == 0);
+	value_ = Slice((const char *)item.data, item.size);
+	valid_ = true;
+}
+
+// Position at the last key in the source.  The iterator is
+// Valid() after this call iff the source is not empty.
+void
+IteratorImpl::SeekToLast()
+{
+	WT_ITEM item;
+
+	int ret = cursor_->reset(cursor_);
+	assert(ret == 0);
+	ret = cursor_->prev(cursor_);
+	if (ret != 0) {
+		valid_ = false;
+		return;
+	}
+	ret = cursor_->get_key(cursor_, &item);
+	assert(ret == 0);
+	key_ = Slice((const char *)item.data, item.size);
+	ret = cursor_->get_value(cursor_, &item);
+	assert(ret == 0);
+	value_ = Slice((const char *)item.data, item.size);
+	valid_ = true;
+}
+
+// Position at the first key in the source that at or past target
+// The iterator is Valid() after this call iff the source contains
+// an entry that comes at or past target.
+void
+IteratorImpl::Seek(const Slice& target)
+{
+	WT_ITEM item;
+
+	item.data = target.data();
+	item.size = target.size();
+	cursor_->set_key(cursor_, &item);
+	int cmp, ret = cursor_->search_near(cursor_, &cmp);
+	if (ret == 0 && cmp < 0)
+		ret = cursor_->next(cursor_);
+	if (ret == WT_NOTFOUND)
+		status_ = Status::NotFound("Iterator::Seek key not found");
+	if (ret != 0) {
+		valid_ = false;
+		return;
+	}
+	ret = cursor_->get_key(cursor_, &item);
+	assert(ret == 0);
+	key_ = Slice((const char *)item.data, item.size);
+	ret = cursor_->get_value(cursor_, &item);
+	assert(ret == 0);
+	value_ = Slice((const char *)item.data, item.size);
+	valid_ = true;
+}
+
+// Moves to the next entry in the source.  After this call, Valid() is
+// true iff the iterator was not positioned at the last entry in the source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Next()
+{
+	WT_ITEM item;
+
+	assert(valid_);
+
+	int ret = cursor_->next(cursor_);
+	if (ret == WT_NOTFOUND)
+		status_ = Status::NotFound("Iterator::Next no more records");
+	if (ret != 0) {
+		valid_ = false;
+		return;
+	}
+	ret = cursor_->get_key(cursor_, &item);
+	assert(ret == 0);
+	key_ = Slice((const char *)item.data, item.size);
+	ret = cursor_->get_value(cursor_, &item);
+	assert(ret == 0);
+	value_ = Slice((const char *)item.data, item.size);
+	valid_ = true;
+}
+
+// Moves to the previous entry in the source.  After this call, Valid() is
+// true iff the iterator was not positioned at the first entry in source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Prev()
+{
+	WT_ITEM item;
+
+	assert(valid_);
+
+	int ret = cursor_->prev(cursor_);
+	if (ret == WT_NOTFOUND)
+		status_ = Status::NotFound("Iterator::Prev no more records");
+	if (ret != 0) {
+		valid_ = false;
+		return;
+	}
+	ret = cursor_->get_key(cursor_, &item);
+	assert(ret == 0);
+	key_ = Slice((const char *)item.data, item.size);
+	ret = cursor_->get_value(cursor_, &item);
+	assert(ret == 0);
+	value_ = Slice((const char *)item.data, item.size);
+	valid_ = true;
+}
diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h
new file mode 100644
index 00000000000..0c46456a9ef
--- /dev/null
+++ b/api/leveldb/leveldb_wt.h
@@ -0,0 +1,23 @@
+#ifndef WITHOUT_HYPERLEVELDB
+#include <hyperleveldb/comparator.h>
+#include <hyperleveldb/db.h>
+#include <hyperleveldb/env.h>
+#include <hyperleveldb/filter_policy.h>
+#include <hyperleveldb/slice.h>
+#include <hyperleveldb/slice.h>
+#include <hyperleveldb/status.h>
+#include <hyperleveldb/table_builder.h>
+#include <hyperleveldb/write_batch.h>
+#else
+#include <leveldb/comparator.h>
+#include <leveldb/db.h>
+#include <leveldb/env.h>
+#include <leveldb/filter_policy.h>
+#include <leveldb/slice.h>
+#include <leveldb/slice.h>
+#include <leveldb/status.h>
+#include <leveldb/table_builder.h>
+#include <leveldb/write_batch.h>
+#endif
+
+#include "wiredtiger.h"
diff --git a/api/leveldb/port/port.h b/api/leveldb/port/port.h
new file mode 100644
index 00000000000..b6f551182da
--- /dev/null
+++ b/api/leveldb/port/port.h
@@ -0,0 +1,11 @@
+#ifndef _PORT_H_
+#define _PORT_H_ 1
+/* Stub portability header for imported LevelDB code. */
+
+#include "wiredtiger.h"
+
+namespace port {
+	const int kLittleEndian = 1;
+}
+
+#endif
diff --git a/api/leveldb/util/arena.h b/api/leveldb/util/arena.h
new file mode 100644
index 00000000000..8f7dde226c4
--- /dev/null
+++ b/api/leveldb/util/arena.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
+#define STORAGE_LEVELDB_UTIL_ARENA_H_
+
+#include <cstddef>
+#include <vector>
+#include <assert.h>
+#include <stdint.h>
+
+namespace leveldb {
+
+class Arena {
+ public:
+  Arena();
+  ~Arena();
+
+  // Return a pointer to a newly allocated memory block of "bytes" bytes.
+  char* Allocate(size_t bytes);
+
+  // Allocate memory with the normal alignment guarantees provided by malloc
+  char* AllocateAligned(size_t bytes);
+
+  // Returns an estimate of the total memory usage of data allocated
+  // by the arena (including space allocated but not yet used for user
+  // allocations).
+  size_t MemoryUsage() const {
+    return blocks_memory_ + blocks_.capacity() * sizeof(char*);
+  }
+
+ private:
+  char* AllocateFallback(size_t bytes);
+  char* AllocateNewBlock(size_t block_bytes);
+
+  // Allocation state
+  char* alloc_ptr_;
+  size_t alloc_bytes_remaining_;
+
+  // Array of new[] allocated memory blocks
+  std::vector<char*> blocks_;
+
+  // Bytes of memory in blocks allocated so far
+  size_t blocks_memory_;
+
+  // No copying allowed
+  Arena(const Arena&);
+  void operator=(const Arena&);
+};
+
+inline char* Arena::Allocate(size_t bytes) {
+  // The semantics of what to return are a bit messy if we allow
+  // 0-byte allocations, so we disallow them here (we don't need
+  // them for our internal use).
+  assert(bytes > 0);
+  if (bytes <= alloc_bytes_remaining_) {
+    char* result = alloc_ptr_;
+    alloc_ptr_ += bytes;
+    alloc_bytes_remaining_ -= bytes;
+    return result;
+  }
+  return AllocateFallback(bytes);
+}
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_UTIL_ARENA_H_
diff --git a/api/leveldb/util/coding.cc b/api/leveldb/util/coding.cc
new file mode 100644
index 00000000000..dbd7a6545c6
--- /dev/null
+++ b/api/leveldb/util/coding.cc
@@ -0,0 +1,194 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+namespace leveldb {
+
+void EncodeFixed32(char* buf, uint32_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+  memcpy(buf, &value, sizeof(value));
+#else
+  buf[0] = value & 0xff;
+  buf[1] = (value >> 8) & 0xff;
+  buf[2] = (value >> 16) & 0xff;
+  buf[3] = (value >> 24) & 0xff;
+#endif
+}
+
+void EncodeFixed64(char* buf, uint64_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+  memcpy(buf, &value, sizeof(value));
+#else
+  buf[0] = value & 0xff;
+  buf[1] = (value >> 8) & 0xff;
+  buf[2] = (value >> 16) & 0xff;
+  buf[3] = (value >> 24) & 0xff;
+  buf[4] = (value >> 32) & 0xff;
+  buf[5] = (value >> 40) & 0xff;
+  buf[6] = (value >> 48) & 0xff;
+  buf[7] = (value >> 56) & 0xff;
+#endif
+}
+
+void PutFixed32(std::string* dst, uint32_t value) {
+  char buf[sizeof(value)];
+  EncodeFixed32(buf, value);
+  dst->append(buf, sizeof(buf));
+}
+
+void PutFixed64(std::string* dst, uint64_t value) {
+  char buf[sizeof(value)];
+  EncodeFixed64(buf, value);
+  dst->append(buf, sizeof(buf));
+}
+
+char* EncodeVarint32(char* dst, uint32_t v) {
+  // Operate on characters as unsigneds
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  static const int B = 128;
+  if (v < (1<<7)) {
+    *(ptr++) = v;
+  } else if (v < (1<<14)) {
+    *(ptr++) = v | B;
+    *(ptr++) = v>>7;
+  } else if (v < (1<<21)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = v>>14;
+  } else if (v < (1<<28)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = v>>21;
+  } else {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = (v>>21) | B;
+    *(ptr++) = v>>28;
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint32(std::string* dst, uint32_t v) {
+  char buf[5];
+  char* ptr = EncodeVarint32(buf, v);
+  dst->append(buf, ptr - buf);
+}
+
+char* EncodeVarint64(char* dst, uint64_t v) {
+  static const int B = 128;
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+  while (v >= B) {
+    *(ptr++) = (v & (B-1)) | B;
+    v >>= 7;
+  }
+  *(ptr++) = static_cast<unsigned char>(v);
+  return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint64(std::string* dst, uint64_t v) {
+  char buf[10];
+  char* ptr = EncodeVarint64(buf, v);
+  dst->append(buf, ptr - buf);
+}
+
+void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
+  PutVarint32(dst, value.size());
+  dst->append(value.data(), value.size());
+}
+
+int VarintLength(uint64_t v) {
+  int len = 1;
+  while (v >= 128) {
+    v >>= 7;
+    len++;
+  }
+  return len;
+}
+
+const char* GetVarint32PtrFallback(const char* p,
+                                   const char* limit,
+                                   uint32_t* value) {
+  uint32_t result = 0;
+  for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
+    uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return NULL;
+}
+
+bool GetVarint32(Slice* input, uint32_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint32Ptr(p, limit, value);
+  if (q == NULL) {
+    return false;
+  } else {
+    *input = Slice(q, limit - q);
+    return true;
+  }
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
+  uint64_t result = 0;
+  for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
+    uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
+    p++;
+    if (byte & 128) {
+      // More bytes are present
+      result |= ((byte & 127) << shift);
+    } else {
+      result |= (byte << shift);
+      *value = result;
+      return reinterpret_cast<const char*>(p);
+    }
+  }
+  return NULL;
+}
+
+bool GetVarint64(Slice* input, uint64_t* value) {
+  const char* p = input->data();
+  const char* limit = p + input->size();
+  const char* q = GetVarint64Ptr(p, limit, value);
+  if (q == NULL) {
+    return false;
+  } else {
+    *input = Slice(q, limit - q);
+    return true;
+  }
+}
+
+const char* GetLengthPrefixedSlice(const char* p, const char* limit,
+                                   Slice* result) {
+  uint32_t len;
+  p = GetVarint32Ptr(p, limit, &len);
+  if (p == NULL) return NULL;
+  if (p + len > limit) return NULL;
+  *result = Slice(p, len);
+  return p + len;
+}
+
+bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
+  uint32_t len;
+  if (GetVarint32(input, &len) &&
+      input->size() >= len) {
+    *result = Slice(input->data(), len);
+    input->remove_prefix(len);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/coding.h b/api/leveldb/util/coding.h
new file mode 100644
index 00000000000..cba97ec421a
--- /dev/null
+++ b/api/leveldb/util/coding.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Endian-neutral encoding:
+// * Fixed-length numbers are encoded with least-significant byte first
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+
+#ifndef STORAGE_LEVELDB_UTIL_CODING_H_
+#define STORAGE_LEVELDB_UTIL_CODING_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <string>
+#include "leveldb_wt.h"
+#include "port/port.h"
+
+namespace leveldb {
+
+// Standard Put... routines append to a string
+extern void PutFixed32(std::string* dst, uint32_t value);
+extern void PutFixed64(std::string* dst, uint64_t value);
+extern void PutVarint32(std::string* dst, uint32_t value);
+extern void PutVarint64(std::string* dst, uint64_t value);
+extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
+
+// Standard Get... routines parse a value from the beginning of a Slice
+// and advance the slice past the parsed value.
+extern bool GetVarint32(Slice* input, uint32_t* value);
+extern bool GetVarint64(Slice* input, uint64_t* value);
+extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
+
+// Pointer-based variants of GetVarint...  These either store a value
+// in *v and return a pointer just past the parsed value, or return
+// NULL on error.  These routines only look at bytes in the range
+// [p..limit-1]
+extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v);
+extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v);
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+extern void EncodeFixed32(char* dst, uint32_t value);
+extern void EncodeFixed64(char* dst, uint64_t value);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// and return a pointer just past the last byte written.
+// REQUIRES: dst has enough space for the value being written
+extern char* EncodeVarint32(char* dst, uint32_t value);
+extern char* EncodeVarint64(char* dst, uint64_t value);
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint32_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
+        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
+        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
+        | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+  }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+  if (port::kLittleEndian) {
+    // Load the raw bytes
+    uint64_t result;
+    memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
+    return result;
+  } else {
+    uint64_t lo = DecodeFixed32(ptr);
+    uint64_t hi = DecodeFixed32(ptr + 4);
+    return (hi << 32) | lo;
+  }
+}
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p,
+                                          const char* limit,
+                                          uint32_t* value);
+inline const char* GetVarint32Ptr(const char* p,
+                                  const char* limit,
+                                  uint32_t* value) {
+  if (p < limit) {
+    uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
+    if ((result & 128) == 0) {
+      *value = result;
+      return p + 1;
+    }
+  }
+  return GetVarint32PtrFallback(p, limit, value);
+}
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_UTIL_CODING_H_
diff --git a/api/leveldb/util/comparator.cc b/api/leveldb/util/comparator.cc
new file mode 100644
index 00000000000..c6c9e17efee
--- /dev/null
+++ b/api/leveldb/util/comparator.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <algorithm>
+#include <stdint.h>
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+Comparator::~Comparator() { }
+
+#ifndef WITHOUT_HYPERLEVELDB
+uint64_t Comparator::KeyNum(const Slice& key) const {
+  return 0;
+}
+#endif
+
+namespace {
+class BytewiseComparatorImpl : public Comparator {
+ public:
+  BytewiseComparatorImpl() { }
+
+  virtual const char* Name() const {
+    return "leveldb.BytewiseComparator";
+  }
+
+  virtual int Compare(const Slice& a, const Slice& b) const {
+    return a.compare(b);
+  }
+
+  virtual void FindShortestSeparator(
+      std::string* start,
+      const Slice& limit) const {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    if (diff_index >= min_length) {
+      // Do not shorten if one string is a prefix of the other
+    } else {
+      uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
+      if (diff_byte < static_cast<uint8_t>(0xff) &&
+          diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
+        (*start)[diff_index]++;
+        start->resize(diff_index + 1);
+        assert(Compare(*start, limit) < 0);
+      }
+    }
+  }
+
+  virtual void FindShortSuccessor(std::string* key) const {
+    // Find first character that can be incremented
+    size_t n = key->size();
+    for (size_t i = 0; i < n; i++) {
+      const uint8_t byte = (*key)[i];
+      if (byte != static_cast<uint8_t>(0xff)) {
+        (*key)[i] = byte + 1;
+        key->resize(i+1);
+        return;
+      }
+    }
+    // *key is a run of 0xffs.  Leave it alone.
+  }
+};
+}  // namespace
+
+static const Comparator* bytewise = new BytewiseComparatorImpl;
+
+const Comparator* BytewiseComparator() {
+  return bytewise;
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/env.cc b/api/leveldb/util/env.cc
new file mode 100644
index 00000000000..33062fbdd1b
--- /dev/null
+++ b/api/leveldb/util/env.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "hyperleveldb/env.h"
+
+namespace leveldb {
+
+Env::~Env() {
+}
+
+SequentialFile::~SequentialFile() {
+}
+
+RandomAccessFile::~RandomAccessFile() {
+}
+
+WritableFile::~WritableFile() {
+}
+
+Logger::~Logger() {
+}
+
+FileLock::~FileLock() {
+}
+
+void Log(Logger* info_log, const char* format, ...) {
+  if (info_log != NULL) {
+    va_list ap;
+    va_start(ap, format);
+    info_log->Logv(format, ap);
+    va_end(ap);
+  }
+}
+
+static Status DoWriteStringToFile(Env* env, const Slice& data,
+                                  const std::string& fname,
+                                  bool should_sync) {
+  WritableFile* file;
+  Status s = env->NewWritableFile(fname, &file);
+  if (!s.ok()) {
+    return s;
+  }
+  s = file->Append(data);
+  if (s.ok() && should_sync) {
+    s = file->Sync();
+  }
+  if (s.ok()) {
+    s = file->Close();
+  }
+  delete file;  // Will auto-close if we did not close above
+  if (!s.ok()) {
+    env->DeleteFile(fname);
+  }
+  return s;
+}
+
+Status WriteStringToFile(Env* env, const Slice& data,
+                         const std::string& fname) {
+  return DoWriteStringToFile(env, data, fname, false);
+}
+
+Status WriteStringToFileSync(Env* env, const Slice& data,
+                             const std::string& fname) {
+  return DoWriteStringToFile(env, data, fname, true);
+}
+
+Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
+  data->clear();
+  SequentialFile* file;
+  Status s = env->NewSequentialFile(fname, &file);
+  if (!s.ok()) {
+    return s;
+  }
+  static const int kBufferSize = 8192;
+  char* space = new char[kBufferSize];
+  while (true) {
+    Slice fragment;
+    s = file->Read(kBufferSize, &fragment, space);
+    if (!s.ok()) {
+      break;
+    }
+    data->append(fragment.data(), fragment.size());
+    if (fragment.empty()) {
+      break;
+    }
+  }
+  delete[] space;
+  delete file;
+  return s;
+}
+
+EnvWrapper::~EnvWrapper() {
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/env_posix.cc b/api/leveldb/util/env_posix.cc
new file mode 100644
index 00000000000..66dab413360
--- /dev/null
+++ b/api/leveldb/util/env_posix.cc
@@ -0,0 +1,617 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <deque>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(LEVELDB_PLATFORM_ANDROID)
+#include <sys/stat.h>
+#endif
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+#include "util/posix_logger.h"
+
+namespace leveldb {
+
+namespace {
+
+static Status IOError(const std::string& context, int err_number) {
+  return Status::IOError(context, strerror(err_number));
+}
+
+class PosixSequentialFile: public SequentialFile {
+ private:
+  std::string filename_;
+  FILE* file_;
+
+ public:
+  PosixSequentialFile(const std::string& fname, FILE* f)
+      : filename_(fname), file_(f) { }
+  virtual ~PosixSequentialFile() { fclose(file_); }
+
+  virtual Status Read(size_t n, Slice* result, char* scratch) {
+    Status s;
+    size_t r = fread_unlocked(scratch, 1, n, file_);
+    *result = Slice(scratch, r);
+    if (r < n) {
+      if (feof(file_)) {
+        // We leave status as ok if we hit the end of the file
+      } else {
+        // A partial read with an error: return a non-ok status
+        s = IOError(filename_, errno);
+      }
+    }
+    return s;
+  }
+
+  virtual Status Skip(uint64_t n) {
+    if (fseek(file_, n, SEEK_CUR)) {
+      return IOError(filename_, errno);
+    }
+    return Status::OK();
+  }
+};
+
+// pread() based random-access
+class PosixRandomAccessFile: public RandomAccessFile {
+ private:
+  std::string filename_;
+  int fd_;
+
+ public:
+  PosixRandomAccessFile(const std::string& fname, int fd)
+      : filename_(fname), fd_(fd) { }
+  virtual ~PosixRandomAccessFile() { close(fd_); }
+
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const {
+    Status s;
+    ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
+    *result = Slice(scratch, (r < 0) ? 0 : r);
+    if (r < 0) {
+      // An error: return a non-ok status
+      s = IOError(filename_, errno);
+    }
+    return s;
+  }
+};
+
+// mmap() based random-access
+class PosixMmapReadableFile: public RandomAccessFile {
+ private:
+  std::string filename_;
+  void* mmapped_region_;
+  size_t length_;
+
+ public:
+  // base[0,length-1] contains the mmapped contents of the file.
+  PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
+      : filename_(fname), mmapped_region_(base), length_(length) { }
+  virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
+
+  virtual Status Read(uint64_t offset, size_t n, Slice* result,
+                      char* scratch) const {
+    Status s;
+    if (offset + n > length_) {
+      *result = Slice();
+      s = IOError(filename_, EINVAL);
+    } else {
+      *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+    }
+    return s;
+  }
+};
+
+// We preallocate up to an extra megabyte and use memcpy to append new
+// data to the file.  This is safe since we either properly close the
+// file before reading from it, or for log files, the reading code
+// knows enough to skip zero suffixes.
+class PosixMmapFile : public WritableFile {
+ private:
+  std::string filename_;
+  int fd_;
+  size_t page_size_;
+  size_t map_size_;       // How much extra memory to map at a time
+  char* base_;            // The mapped region
+  char* limit_;           // Limit of the mapped region
+  char* dst_;             // Where to write next  (in range [base_,limit_])
+  char* last_sync_;       // Where have we synced up to
+  uint64_t file_offset_;  // Offset of base_ in file
+
+  // Have we done an munmap of unsynced data?
+  bool pending_sync_;
+
+  // Roundup x to a multiple of y
+  static size_t Roundup(size_t x, size_t y) {
+    return ((x + y - 1) / y) * y;
+  }
+
+  size_t TruncateToPageBoundary(size_t s) {
+    s -= (s & (page_size_ - 1));
+    assert((s % page_size_) == 0);
+    return s;
+  }
+
+  bool UnmapCurrentRegion() {
+    bool result = true;
+    if (base_ != NULL) {
+      if (last_sync_ < limit_) {
+        // Defer syncing this data until next Sync() call, if any
+        pending_sync_ = true;
+      }
+      if (munmap(base_, limit_ - base_) != 0) {
+        result = false;
+      }
+      file_offset_ += limit_ - base_;
+      base_ = NULL;
+      limit_ = NULL;
+      last_sync_ = NULL;
+      dst_ = NULL;
+
+      // Increase the amount we map the next time, but capped at 1MB
+      if (map_size_ < (1<<20)) {
+        map_size_ *= 2;
+      }
+    }
+    return result;
+  }
+
+  bool MapNewRegion() {
+    assert(base_ == NULL);
+    if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
+      return false;
+    }
+    void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
+                     fd_, file_offset_);
+    if (ptr == MAP_FAILED) {
+      return false;
+    }
+    base_ = reinterpret_cast<char*>(ptr);
+    limit_ = base_ + map_size_;
+    dst_ = base_;
+    last_sync_ = base_;
+    return true;
+  }
+
+ public:
+  PosixMmapFile(const std::string& fname, int fd, size_t page_size)
+      : filename_(fname),
+        fd_(fd),
+        page_size_(page_size),
+        map_size_(Roundup(65536, page_size)),
+        base_(NULL),
+        limit_(NULL),
+        dst_(NULL),
+        last_sync_(NULL),
+        file_offset_(0),
+        pending_sync_(false) {
+    assert((page_size & (page_size - 1)) == 0);
+  }
+
+
+  ~PosixMmapFile() {
+    if (fd_ >= 0) {
+      PosixMmapFile::Close();
+    }
+  }
+
+  virtual Status Append(const Slice& data) {
+    const char* src = data.data();
+    size_t left = data.size();
+    while (left > 0) {
+      assert(base_ <= dst_);
+      assert(dst_ <= limit_);
+      size_t avail = limit_ - dst_;
+      if (avail == 0) {
+        if (!UnmapCurrentRegion() ||
+            !MapNewRegion()) {
+          return IOError(filename_, errno);
+        }
+      }
+
+      size_t n = (left <= avail) ? left : avail;
+      memcpy(dst_, src, n);
+      dst_ += n;
+      src += n;
+      left -= n;
+    }
+    return Status::OK();
+  }
+
+  virtual Status Close() {
+    Status s;
+    size_t unused = limit_ - dst_;
+    if (!UnmapCurrentRegion()) {
+      s = IOError(filename_, errno);
+    } else if (unused > 0) {
+      // Trim the extra space at the end of the file
+      if (ftruncate(fd_, file_offset_ - unused) < 0) {
+        s = IOError(filename_, errno);
+      }
+    }
+
+    if (close(fd_) < 0) {
+      if (s.ok()) {
+        s = IOError(filename_, errno);
+      }
+    }
+
+    fd_ = -1;
+    base_ = NULL;
+    limit_ = NULL;
+    return s;
+  }
+
+  virtual Status Flush() {
+    return Status::OK();
+  }
+
+  virtual Status Sync() {
+    Status s;
+
+    if (pending_sync_) {
+      // Some unmapped data was not synced
+      pending_sync_ = false;
+      if (fdatasync(fd_) < 0) {
+        s = IOError(filename_, errno);
+      }
+    }
+
+    if (dst_ > last_sync_) {
+      // Find the beginnings of the pages that contain the first and last
+      // bytes to be synced.
+      size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
+      size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
+      last_sync_ = dst_;
+      if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
+        s = IOError(filename_, errno);
+      }
+    }
+
+    return s;
+  }
+
+#ifndef WITHOUT_HYPERLEVELDB
+  virtual Status WriteAt(uint64_t offset, const Slice& data) { return Status::NotSupported("sorry!"); }
+#endif
+};
+
+static int LockOrUnlock(int fd, bool lock) {
+  errno = 0;
+  struct flock f;
+  memset(&f, 0, sizeof(f));
+  f.l_type = (lock ? F_WRLCK : F_UNLCK);
+  f.l_whence = SEEK_SET;
+  f.l_start = 0;
+  f.l_len = 0;        // Lock/unlock entire file
+  return fcntl(fd, F_SETLK, &f);
+}
+
+class PosixFileLock : public FileLock {
+ public:
+  int fd_;
+};
+
+class PosixEnv : public Env {
+ public:
+  PosixEnv();
+  virtual ~PosixEnv() {
+    fprintf(stderr, "Destroying Env::Default()\n");
+    exit(1);
+  }
+
+  virtual Status NewSequentialFile(const std::string& fname,
+                                   SequentialFile** result) {
+    FILE* f = fopen(fname.c_str(), "r");
+    if (f == NULL) {
+      *result = NULL;
+      return IOError(fname, errno);
+    } else {
+      *result = new PosixSequentialFile(fname, f);
+      return Status::OK();
+    }
+  }
+
+  virtual Status NewRandomAccessFile(const std::string& fname,
+                                     RandomAccessFile** result) {
+    *result = NULL;
+    Status s;
+    int fd = open(fname.c_str(), O_RDONLY);
+    if (fd < 0) {
+      s = IOError(fname, errno);
+    } else if (sizeof(void*) >= 8) {
+      // Use mmap when virtual address-space is plentiful.
+      uint64_t size;
+      s = GetFileSize(fname, &size);
+      if (s.ok()) {
+        void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+        if (base != MAP_FAILED) {
+          *result = new PosixMmapReadableFile(fname, base, size);
+        } else {
+          s = IOError(fname, errno);
+        }
+      }
+      close(fd);
+    } else {
+      *result = new PosixRandomAccessFile(fname, fd);
+    }
+    return s;
+  }
+
+  virtual Status NewWritableFile(const std::string& fname,
+                                 WritableFile** result) {
+    Status s;
+    const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
+    if (fd < 0) {
+      *result = NULL;
+      s = IOError(fname, errno);
+    } else {
+      *result = new PosixMmapFile(fname, fd, page_size_);
+    }
+    return s;
+  }
+
+  virtual bool FileExists(const std::string& fname) {
+    return access(fname.c_str(), F_OK) == 0;
+  }
+
+  virtual Status GetChildren(const std::string& dir,
+                             std::vector<std::string>* result) {
+    result->clear();
+    DIR* d = opendir(dir.c_str());
+    if (d == NULL) {
+      return IOError(dir, errno);
+    }
+    struct dirent* entry;
+    while ((entry = readdir(d)) != NULL) {
+      result->push_back(entry->d_name);
+    }
+    closedir(d);
+    return Status::OK();
+  }
+
+  virtual Status DeleteFile(const std::string& fname) {
+    Status result;
+    if (unlink(fname.c_str()) != 0) {
+      result = IOError(fname, errno);
+    }
+    return result;
+  };
+
+  virtual Status CreateDir(const std::string& name) {
+    Status result;
+    if (mkdir(name.c_str(), 0755) != 0) {
+      result = IOError(name, errno);
+    }
+    return result;
+  };
+
+  virtual Status DeleteDir(const std::string& name) {
+    Status result;
+    if (rmdir(name.c_str()) != 0) {
+      result = IOError(name, errno);
+    }
+    return result;
+  };
+
+  virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
+    Status s;
+    struct stat sbuf;
+    if (stat(fname.c_str(), &sbuf) != 0) {
+      *size = 0;
+      s = IOError(fname, errno);
+    } else {
+      *size = sbuf.st_size;
+    }
+    return s;
+  }
+
+  virtual Status RenameFile(const std::string& src, const std::string& target) {
+    Status result;
+    if (rename(src.c_str(), target.c_str()) != 0) {
+      result = IOError(src, errno);
+    }
+    return result;
+  }
+
+  virtual Status LockFile(const std::string& fname, FileLock** lock) {
+    *lock = NULL;
+    Status result;
+    int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+    if (fd < 0) {
+      result = IOError(fname, errno);
+    } else if (LockOrUnlock(fd, true) == -1) {
+      result = IOError("lock " + fname, errno);
+      close(fd);
+    } else {
+      PosixFileLock* my_lock = new PosixFileLock;
+      my_lock->fd_ = fd;
+      *lock = my_lock;
+    }
+    return result;
+  }
+
+  virtual Status UnlockFile(FileLock* lock) {
+    PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
+    Status result;
+    if (LockOrUnlock(my_lock->fd_, false) == -1) {
+      result = IOError("unlock", errno);
+    }
+    close(my_lock->fd_);
+    delete my_lock;
+    return result;
+  }
+
+  virtual void Schedule(void (*function)(void*), void* arg);
+
+  virtual void StartThread(void (*function)(void* arg), void* arg);
+
+  virtual Status GetTestDirectory(std::string* result) {
+    const char* env = getenv("TEST_TMPDIR");
+    if (env && env[0] != '\0') {
+      *result = env;
+    } else {
+      char buf[100];
+      snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid()));
+      *result = buf;
+    }
+    // Directory may already exist
+    CreateDir(*result);
+    return Status::OK();
+  }
+
+  static uint64_t gettid() {
+    pthread_t tid = pthread_self();
+    uint64_t thread_id = 0;
+    memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
+    return thread_id;
+  }
+
+  virtual Status NewLogger(const std::string& fname, Logger** result) {
+    FILE* f = fopen(fname.c_str(), "w");
+    if (f == NULL) {
+      *result = NULL;
+      return IOError(fname, errno);
+    } else {
+      *result = new PosixLogger(f, &PosixEnv::gettid);
+      return Status::OK();
+    }
+  }
+
+  virtual uint64_t NowMicros() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+  }
+
+  virtual void SleepForMicroseconds(int micros) {
+    usleep(micros);
+  }
+
+#ifndef WITHOUT_HYPERLEVELDB
+  virtual Status CopyFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+  virtual Status LinkFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+#endif
+
+ private:
+  void PthreadCall(const char* label, int result) {
+    if (result != 0) {
+      fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
+      exit(1);
+    }
+  }
+
+  // BGThread() is the body of the background thread
+  void BGThread();
+  static void* BGThreadWrapper(void* arg) {
+    reinterpret_cast<PosixEnv*>(arg)->BGThread();
+    return NULL;
+  }
+
+  size_t page_size_;
+  pthread_mutex_t mu_;
+  pthread_cond_t bgsignal_;
+  pthread_t bgthread_;
+  bool started_bgthread_;
+
+  // Entry per Schedule() call
+  struct BGItem { void* arg; void (*function)(void*); };
+  typedef std::deque<BGItem> BGQueue;
+  BGQueue queue_;
+};
+
+PosixEnv::PosixEnv() : page_size_(getpagesize()),
+                       started_bgthread_(false) {
+  PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
+  PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
+}
+
+void PosixEnv::Schedule(void (*function)(void*), void* arg) {
+  PthreadCall("lock", pthread_mutex_lock(&mu_));
+
+  // Start background thread if necessary
+  if (!started_bgthread_) {
+    started_bgthread_ = true;
+    PthreadCall(
+        "create thread",
+        pthread_create(&bgthread_, NULL,  &PosixEnv::BGThreadWrapper, this));
+  }
+
+  // If the queue is currently empty, the background thread may currently be
+  // waiting.
+  if (queue_.empty()) {
+    PthreadCall("signal", pthread_cond_signal(&bgsignal_));
+  }
+
+  // Add to priority queue
+  queue_.push_back(BGItem());
+  queue_.back().function = function;
+  queue_.back().arg = arg;
+
+  PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+void PosixEnv::BGThread() {
+  while (true) {
+    // Wait until there is an item that is ready to run
+    PthreadCall("lock", pthread_mutex_lock(&mu_));
+    while (queue_.empty()) {
+      PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_));
+    }
+
+    void (*function)(void*) = queue_.front().function;
+    void* arg = queue_.front().arg;
+    queue_.pop_front();
+
+    PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+    (*function)(arg);
+  }
+}
+
+namespace {
+struct StartThreadState {
+  void (*user_function)(void*);
+  void* arg;
+};
+}
+static void* StartThreadWrapper(void* arg) {
+  StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
+  state->user_function(state->arg);
+  delete state;
+  return NULL;
+}
+
+void PosixEnv::StartThread(void (*function)(void* arg), void* arg) {
+  pthread_t t;
+  StartThreadState* state = new StartThreadState;
+  state->user_function = function;
+  state->arg = arg;
+  PthreadCall("start thread",
+              pthread_create(&t, NULL,  &StartThreadWrapper, state));
+}
+
+}  // namespace
+
+static pthread_once_t once = PTHREAD_ONCE_INIT;
+static Env* default_env;
+static void InitDefaultEnv() { default_env = new PosixEnv; }
+
+Env* Env::Default() {
+  pthread_once(&once, InitDefaultEnv);
+  return default_env;
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/logging.cc b/api/leveldb/util/logging.cc
new file mode 100644
index 00000000000..1d60ce22701
--- /dev/null
+++ b/api/leveldb/util/logging.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/logging.h"
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+void AppendNumberTo(std::string* str, uint64_t num) {
+  char buf[30];
+  snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num);
+  str->append(buf);
+}
+
+void AppendEscapedStringTo(std::string* str, const Slice& value) {
+  for (size_t i = 0; i < value.size(); i++) {
+    char c = value[i];
+    if (c >= ' ' && c <= '~') {
+      str->push_back(c);
+    } else {
+      char buf[10];
+      snprintf(buf, sizeof(buf), "\\x%02x",
+               static_cast<unsigned int>(c) & 0xff);
+      str->append(buf);
+    }
+  }
+}
+
+std::string NumberToString(uint64_t num) {
+  std::string r;
+  AppendNumberTo(&r, num);
+  return r;
+}
+
+std::string EscapeString(const Slice& value) {
+  std::string r;
+  AppendEscapedStringTo(&r, value);
+  return r;
+}
+
+bool ConsumeChar(Slice* in, char c) {
+  if (!in->empty() && (*in)[0] == c) {
+    in->remove_prefix(1);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
+  uint64_t v = 0;
+  int digits = 0;
+  while (!in->empty()) {
+    char c = (*in)[0];
+    if (c >= '0' && c <= '9') {
+      ++digits;
+      const int delta = (c - '0');
+      static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
+      if (v > kMaxUint64/10 ||
+          (v == kMaxUint64/10 && delta > kMaxUint64%10)) {
+        // Overflow
+        return false;
+      }
+      v = (v * 10) + delta;
+      in->remove_prefix(1);
+    } else {
+      break;
+    }
+  }
+  *val = v;
+  return (digits > 0);
+}
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/logging.h b/api/leveldb/util/logging.h
new file mode 100644
index 00000000000..b0c5da813e8
--- /dev/null
+++ b/api/leveldb/util/logging.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Must not be included from any .h files to avoid polluting the namespace
+// with macros.
+
+#ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_
+#define STORAGE_LEVELDB_UTIL_LOGGING_H_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string>
+#include "port/port.h"
+
+namespace leveldb {
+
+class Slice;
+class WritableFile;
+
+// Append a human-readable printout of "num" to *str
+extern void AppendNumberTo(std::string* str, uint64_t num);
+
+// Append a human-readable printout of "value" to *str.
+// Escapes any non-printable characters found in "value".
+extern void AppendEscapedStringTo(std::string* str, const Slice& value);
+
+// Return a human-readable printout of "num"
+extern std::string NumberToString(uint64_t num);
+
+// Return a human-readable version of "value".
+// Escapes any non-printable characters found in "value".
+extern std::string EscapeString(const Slice& value);
+
+// If *in starts with "c", advances *in past the first character and
+// returns true.  Otherwise, returns false.
+extern bool ConsumeChar(Slice* in, char c);
+
+// Parse a human-readable number from "*in" into *value.  On success,
+// advances "*in" past the consumed number and sets "*val" to the
+// numeric value.  Otherwise, returns false and leaves *in in an
+// unspecified state.
+extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_UTIL_LOGGING_H_
diff --git a/api/leveldb/util/options.cc b/api/leveldb/util/options.cc
new file mode 100644
index 00000000000..6956ab6e739
--- /dev/null
+++ b/api/leveldb/util/options.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "hyperleveldb/options.h"
+
+#include "hyperleveldb/comparator.h"
+#include "hyperleveldb/env.h"
+
+namespace leveldb {
+
+Options::Options()
+    : comparator(BytewiseComparator()),
+      create_if_missing(false),
+      error_if_exists(false),
+      paranoid_checks(false),
+      env(Env::Default()),
+      info_log(NULL),
+      write_buffer_size(4<<20),
+      max_open_files(1000),
+      block_cache(NULL),
+      block_size(4096),
+      block_restart_interval(16),
+      compression(kSnappyCompression),
+      filter_policy(NULL) {
+}
+
+
+}  // namespace leveldb
diff --git a/api/leveldb/util/posix_logger.h b/api/leveldb/util/posix_logger.h
new file mode 100644
index 00000000000..f15de45e05e
--- /dev/null
+++ b/api/leveldb/util/posix_logger.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+#define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+
+#include <algorithm>
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+class PosixLogger : public Logger {
+ private:
+  FILE* file_;
+  uint64_t (*gettid_)();  // Return the thread id for the current thread
+ public:
+  PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { }
+  virtual ~PosixLogger() {
+    fclose(file_);
+  }
+  virtual void Logv(const char* format, va_list ap) {
+    const uint64_t thread_id = (*gettid_)();
+
+    // We try twice: the first time with a fixed-size stack allocated buffer,
+    // and the second time with a much larger dynamically allocated buffer.
+    char buffer[500];
+    for (int iter = 0; iter < 2; iter++) {
+      char* base;
+      int bufsize;
+      if (iter == 0) {
+        bufsize = sizeof(buffer);
+        base = buffer;
+      } else {
+        bufsize = 30000;
+        base = new char[bufsize];
+      }
+      char* p = base;
+      char* limit = base + bufsize;
+
+      struct timeval now_tv;
+      gettimeofday(&now_tv, NULL);
+      const time_t seconds = now_tv.tv_sec;
+      struct tm t;
+      localtime_r(&seconds, &t);
+      p += snprintf(p, limit - p,
+                    "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ",
+                    t.tm_year + 1900,
+                    t.tm_mon + 1,
+                    t.tm_mday,
+                    t.tm_hour,
+                    t.tm_min,
+                    t.tm_sec,
+                    static_cast<int>(now_tv.tv_usec),
+                    static_cast<long long unsigned int>(thread_id));
+
+      // Print the message
+      if (p < limit) {
+        va_list backup_ap;
+        va_copy(backup_ap, ap);
+        p += vsnprintf(p, limit - p, format, backup_ap);
+        va_end(backup_ap);
+      }
+
+      // Truncate to available space if necessary
+      if (p >= limit) {
+        if (iter == 0) {
+          continue;       // Try again with larger buffer
+        } else {
+          p = limit - 1;
+        }
+      }
+
+      // Add newline if necessary
+      if (p == base || p[-1] != '\n') {
+        *p++ = '\n';
+      }
+
+      assert(p <= limit);
+      fwrite(base, 1, p - base, file_);
+      fflush(file_);
+      if (base != buffer) {
+        delete[] base;
+      }
+      break;
+    }
+  }
+};
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
diff --git a/api/leveldb/util/random.h b/api/leveldb/util/random.h
new file mode 100644
index 00000000000..66e0c94e7cb
--- /dev/null
+++ b/api/leveldb/util/random.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_
+#define STORAGE_LEVELDB_UTIL_RANDOM_H_
+
+#include <stdint.h>
+
+namespace leveldb {
+
+// A very simple random number generator.  Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package.
+class Random {
+ private:
+  uint32_t seed_;
+ public:
+  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { }
+  uint32_t Next() {
+    static const uint32_t M = 2147483647L;   // 2^31-1
+    static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
+    // We are computing
+    //       seed_ = (seed_ * A) % M,    where M = 2^31-1
+    //
+    // seed_ must not be zero or M, or else all subsequent computed values
+    // will be zero or M respectively.  For all other values, seed_ will end
+    // up cycling through every number in [1,M-1]
+    uint64_t product = seed_ * A;
+
+    // Compute (product % M) using the fact that ((x << 31) % M) == x.
+    seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
+    // The first reduction may overflow by 1 bit, so we may need to
+    // repeat.  mod == M is not possible; using > allows the faster
+    // sign-bit-based test.
+    if (seed_ > M) {
+      seed_ -= M;
+    }
+    return seed_;
+  }
+  // Returns a uniformly distributed value in the range [0..n-1]
+  // REQUIRES: n > 0
+  uint32_t Uniform(int n) { return Next() % n; }
+
+  // Randomly returns true ~"1/n" of the time, and false otherwise.
+  // REQUIRES: n > 0
+  bool OneIn(int n) { return (Next() % n) == 0; }
+
+  // Skewed: pick "base" uniformly from range [0,max_log] and then
+  // return "base" random bits.  The effect is to pick a number in the
+  // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+  uint32_t Skewed(int max_log) {
+    return Uniform(1 << Uniform(max_log + 1));
+  }
+
+  // Shuffle the array into random order
+  void Shuffle(int *array, int n) {
+  	if (n > 1) {
+	  int i;
+	  for (i=0; i<n-1; i++) {
+	  	int j = i + Next() / (2147483647 / (n-i) + 1);
+		int t = array[j];
+		array[j] = array[i];
+		array[i] = t;
+	  }
+	}
+  }
+};
+
+}  // namespace leveldb
+
+#endif  // STORAGE_LEVELDB_UTIL_RANDOM_H_
diff --git a/api/leveldb/util/status.cc b/api/leveldb/util/status.cc
new file mode 100644
index 00000000000..51a8210b679
--- /dev/null
+++ b/api/leveldb/util/status.cc
@@ -0,0 +1,74 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+const char* Status::CopyState(const char* state) {
+  uint32_t size;
+  memcpy(&size, state, sizeof(size));
+  char* result = new char[size + 5];
+  memcpy(result, state, size + 5);
+  return result;
+}
+
+Status::Status(Code code, const Slice& msg, const Slice& msg2) {
+  assert(code != kOk);
+  const uint32_t len1 = msg.size();
+  const uint32_t len2 = msg2.size();
+  const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
+  char* result = new char[size + 5];
+  memcpy(result, &size, sizeof(size));
+  result[4] = static_cast<char>(code);
+  memcpy(result + 5, msg.data(), len1);
+  if (len2) {
+    result[5 + len1] = ':';
+    result[6 + len1] = ' ';
+    memcpy(result + 7 + len1, msg2.data(), len2);
+  }
+  state_ = result;
+}
+
+std::string Status::ToString() const {
+  if (state_ == NULL) {
+    return "OK";
+  } else {
+    char tmp[30];
+    const char* type;
+    switch (code()) {
+      case kOk:
+        type = "OK";
+        break;
+      case kNotFound:
+        type = "NotFound: ";
+        break;
+      case kCorruption:
+        type = "Corruption: ";
+        break;
+      case kNotSupported:
+        type = "Not implemented: ";
+        break;
+      case kInvalidArgument:
+        type = "Invalid argument: ";
+        break;
+      case kIOError:
+        type = "IO error: ";
+        break;
+      default:
+        snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
+                 static_cast<int>(code()));
+        type = tmp;
+        break;
+    }
+    std::string result(type);
+    uint32_t length;
+    memcpy(&length, state_, sizeof(length));
+    result.append(state_ + 5, length);
+    return result;
+  }
+}
+
+}  // namespace leveldb
author	Michael Cahill <michael.cahill@wiredtiger.com>	2014-05-16 20:13:51 +1000
committer	Michael Cahill <michael.cahill@wiredtiger.com>	2014-05-16 20:13:51 +1000
commit	379875bc04a606b95edc091b1033d2087d8be1f2 (patch)
tree	9ae7c1d1e1b54877ff9e4ec3f13f6d7ee66ff634 /api
parent	b4c98612cbc759b6b1a33314f0f6ae39530c1701 (diff)
download	mongo-379875bc04a606b95edc091b1033d2087d8be1f2.tar.gz