summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/api/leveldb
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-01-27 17:17:17 -0500
committerRamon Fernandez <ramon@mongodb.com>2016-01-27 17:17:21 -0500
commit90118b147a6943b19dc929862a11071538db1438 (patch)
treed1e2c409595332174953e7dbe2db262935d89ae5 /src/third_party/wiredtiger/api/leveldb
parentb8cad6a59cbce2831e69e6b94f9544d83d6e00b0 (diff)
downloadmongo-90118b147a6943b19dc929862a11071538db1438.tar.gz
Import wiredtiger-wiredtiger-2.7.0-505-g7fea169.tar.gz from wiredtiger branch mongodb-3.4
ref: 44463c5..7fea169 WT-2355 Fix minor scratch buffer usage in logging. WT-2348 xargs -P isn't portable WT-2347 Java: schema format edge cases WT-2344 OS X compiler warning WT-2342 Enhance wtperf to support background create and drop operations WT-2340 Add logging guarantee assertions, whitespace WT-2339 format post-rebalance verify failure (stress run #11586) WT-2338 Disable using pre-allocated log files when backup cursor is open WT-2335 NULL pointer crash in config_check_search with invalid configuration string WT-2333 Add a flag so drop doesn't block WT-2332 Bug in logging write-no-sync mode WT-2331 Checking of search() result for reference cursors before join() WT-2328 schema drop does direct unlink, it should use a block manager interface. WT-2326 Change WTPERF to use new memory allocation functions instead of the standard WT-2321 WT-2321: race between eviction and worker threads on the eviction queue WT-2320 Only check copyright when cutting releases WT-2316 stress test failure: WT_CURSOR.prev out-of-order returns WT-2314 page-swap error handling is inconsistent WT-2313 sweep-server: conn_dhandle.c, 610: dhandle != conn->cache->evict_file_next WT-2312 re-creating a deleted column-store page can corrupt the in-memory tree WT-2308 custom extractor for ref_cursors in join cursor WT-2305 Fix coverity scan issues on 23/12/2015 WT-2296 New log algorithm needs improving for sync/flush settings WT-2295 WT_SESSION.create does a full-scan of the main table WT-2287 WT_SESSION.rebalance WT-2275 broken DB after application crash WT-2267 Improve wtperf throttling implementation to provide steady load WT-2247 variable-length column-store in-memory page splits WT-2242 WiredTiger treats dead trees the same as other trees in eviction WT-2142 Connection cleanup in Python tests WT-2073 metadata cleanups WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation WT-1517 schema format edge cases SERVER-22064 Coverity analysis defect 77699: Unchecked return value SERVER-21619 sys-perf: WT crash during core_workloads_WT execution
Diffstat (limited to 'src/third_party/wiredtiger/api/leveldb')
-rw-r--r--src/third_party/wiredtiger/api/leveldb/Makefile.am81
-rw-r--r--src/third_party/wiredtiger/api/leveldb/basho/perf_count.cc657
-rw-r--r--src/third_party/wiredtiger/api/leveldb/basho/perf_count.h298
-rw-r--r--src/third_party/wiredtiger/api/leveldb/config.hin22
-rw-r--r--src/third_party/wiredtiger/api/leveldb/dummy.cc28
-rw-r--r--src/third_party/wiredtiger/api/leveldb/hyper_wt.cc415
-rw-r--r--src/third_party/wiredtiger/api/leveldb/hyperleveldb/AUTHORS15
-rw-r--r--src/third_party/wiredtiger/api/leveldb/hyperleveldb/LICENSE28
-rw-r--r--src/third_party/wiredtiger/api/leveldb/hyperleveldb/replay_iterator.h67
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/AUTHORS8
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/LICENSE27
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/db/dbformat.h233
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/db/skiplist.h379
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch.cc110
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch_internal.h53
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/cache.h110
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/comparator.h74
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/db.h350
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/env.h349
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/filter_policy.h78
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/iterator.h105
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/options.h258
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/slice.h127
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/status.h111
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/write_batch.h142
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/port/port.h38
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/arena.h68
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.cc163
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.h311
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/comparator.cc80
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/env.cc96
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/env_posix.cc625
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.cc80
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.h47
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/options.cc26
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/posix_logger.h98
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/random.h72
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb/util/status.cc74
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb_test.cc141
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb_wt.cc810
-rw-r--r--src/third_party/wiredtiger/api/leveldb/leveldb_wt.h461
-rw-r--r--src/third_party/wiredtiger/api/leveldb/rocks_wt.cc315
-rw-r--r--src/third_party/wiredtiger/api/leveldb/rocksdb/LICENSE35
-rw-r--r--src/third_party/wiredtiger/api/leveldb/rocksdb/PATENTS23
-rw-r--r--src/third_party/wiredtiger/api/leveldb/rocksdb/write_batch.cc275
45 files changed, 7963 insertions, 0 deletions
diff --git a/src/third_party/wiredtiger/api/leveldb/Makefile.am b/src/third_party/wiredtiger/api/leveldb/Makefile.am
new file mode 100644
index 00000000000..2cfd9d945a5
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/Makefile.am
@@ -0,0 +1,81 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/leveldb -I$(srcdir)/leveldb/include
+
+lib_LTLIBRARIES = libwiredtiger_leveldb.la
+
+noinst_PROGRAMS = leveldb_test
+
+# Setup the LevelDB headers to be installed in a wiredtiger/leveldb
+# subdirectory, so we don't interfere with other LevelDB installs.
+if HAVE_HYPERLEVELDB
+leveldbincludedir = $(includedir)/wiredtiger/hyperleveldb
+else
+if HAVE_ROCKSDB
+leveldbincludedir = $(includedir)/wiredtiger/rocksdb
+else
+leveldbincludedir = $(includedir)/wiredtiger/leveldb
+endif
+endif
+leveldbinclude_HEADERS = \
+ leveldb_wt_config.h \
+ leveldb/include/leveldb/cache.h \
+ leveldb/include/leveldb/comparator.h\
+ leveldb/include/leveldb/db.h \
+ leveldb/include/leveldb/env.h \
+ leveldb/include/leveldb/filter_policy.h \
+ leveldb/include/leveldb/iterator.h \
+ leveldb/include/leveldb/options.h \
+ leveldb/include/leveldb/slice.h \
+ leveldb/include/leveldb/status.h \
+ leveldb/include/leveldb/write_batch.h
+
+if HAVE_BASHOLEVELDB
+AM_CPPFLAGS += -I$(srcdir)/leveldb/include/leveldb -I$(srcdir)/basho
+leveldbinclude_HEADERS += \
+ basho/perf_count.h
+endif
+if HAVE_HYPERLEVELDB
+AM_CPPFLAGS += -I$(srcdir)/leveldb/include/leveldb -I$(srcdir)/hyperleveldb
+leveldbinclude_HEADERS += \
+ hyperleveldb/replay_iterator.h
+endif
+
+libwiredtiger_leveldb_la_LDFLAGS = -release @VERSION@
+libwiredtiger_leveldb_la_LIBADD = $(top_builddir)/libwiredtiger_static.la
+libwiredtiger_leveldb_la_SOURCES = \
+ leveldb_wt.cc \
+ leveldb/util/coding.cc leveldb/util/comparator.cc leveldb/util/env.cc leveldb/util/env_posix.cc \
+ leveldb/util/logging.cc leveldb/util/options.cc leveldb/util/status.cc
+
+if HAVE_BASHOLEVELDB
+libwiredtiger_leveldb_la_SOURCES += basho/perf_count.cc
+endif
+if HAVE_HYPERLEVELDB
+libwiredtiger_leveldb_la_SOURCES += hyper_wt.cc
+endif
+if HAVE_ROCKSDB
+libwiredtiger_leveldb_la_SOURCES += rocks_wt.cc rocksdb/write_batch.cc
+else
+libwiredtiger_leveldb_la_SOURCES += leveldb/db/write_batch.cc
+endif
+
+if HAVE_ROCKSDB
+pkglib_LTLIBRARIES = librocksdb.la
+else
+pkglib_LTLIBRARIES = libleveldb.la
+endif
+
+libleveldb_la_LDFLAGS = -release @VERSION@
+libleveldb_la_LIBADD = $(top_builddir)/libwiredtiger_static.la
+libleveldb_la_SOURCES = $(libwiredtiger_leveldb_la_SOURCES)
+
+librocksdb_la_LDFLAGS = -release @VERSION@
+librocksdb_la_LIBADD = $(top_builddir)/libwiredtiger_static.la
+librocksdb_la_SOURCES = $(libwiredtiger_leveldb_la_SOURCES)
+
+leveldb_test_SOURCES = leveldb_test.cc
+leveldb_test_LDADD = libwiredtiger_leveldb.la
+
+TESTS = $(noinst_PROGRAMS)
+
+clean-local:
+ rm -rf WTLDB_HOME
diff --git a/src/third_party/wiredtiger/api/leveldb/basho/perf_count.cc b/src/third_party/wiredtiger/api/leveldb/basho/perf_count.cc
new file mode 100644
index 00000000000..0e666ac1dc0
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/basho/perf_count.cc
@@ -0,0 +1,657 @@
+// -------------------------------------------------------------------
+//
+// perf_count.cc: performance counters LevelDB
+//
+// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved.
+//
+// This file is provided to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file
+// except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// -------------------------------------------------------------------
+
+#include <limits.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <syslog.h>
+#include <memory.h>
+#include <errno.h>
+
+#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+#include "perf_count.h"
+#endif
+
+#include "util/coding.h"
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+#ifdef OS_SOLARIS
+# include <atomic.h>
+#endif
+
+
+namespace leveldb
+{
+
+// always have something active in gPerfCounters, eliminates
+// need to test for "is shared object attached yet"
+static PerformanceCounters LocalStartupCounters;
+PerformanceCounters * gPerfCounters(&LocalStartupCounters);
+
+
+ SstCounters::SstCounters()
+ : m_IsReadOnly(false),
+ m_Version(eSstCountVersion),
+ m_CounterSize(eSstCountEnumSize)
+ {
+ memset(m_Counter, 0, sizeof(m_Counter));
+
+ m_Counter[eSstCountKeySmallest]=ULLONG_MAX;
+ m_Counter[eSstCountValueSmallest]=ULLONG_MAX;
+
+ return;
+
+ }; // SstCounters::SstCounters
+
+
+ void
+ SstCounters::EncodeTo(
+ std::string & Dst) const
+ {
+ unsigned loop;
+
+ PutVarint32(&Dst, m_Version);
+ PutVarint32(&Dst, m_CounterSize);
+
+ for(loop=0; loop<eSstCountEnumSize; ++loop)
+ PutVarint64(&Dst, m_Counter[loop]);
+ } // SstCounters::EncodeTo
+
+
+ Status
+ SstCounters::DecodeFrom(
+ const Slice& src)
+ {
+ Status ret_status;
+ Slice cursor;
+ bool good;
+ int loop;
+
+ cursor=src;
+ m_IsReadOnly=true;
+ good=GetVarint32(&cursor, &m_Version);
+ good=good && (m_Version<=eSstCountVersion);
+
+ // all lesser number of stats to be read
+ good=good && GetVarint32(&cursor, &m_CounterSize);
+ if (good && eSstCountEnumSize < m_CounterSize)
+ m_CounterSize=eSstCountEnumSize;
+
+ for (loop=0; good && loop<eSstCountEnumSize; ++loop)
+ {
+ good=GetVarint64(&cursor, &m_Counter[loop]);
+ } // for
+
+ // if (!good) change ret_status to bad
+
+ return(ret_status);
+
+ } // SstCounters::DecodeFrom
+
+
+ uint64_t
+ SstCounters::Inc(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (!m_IsReadOnly && Index<m_CounterSize)
+ {
+ ++m_Counter[Index];
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Inc
+
+
+ uint64_t
+ SstCounters::Add(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (!m_IsReadOnly && Index<m_CounterSize)
+ {
+ m_Counter[Index]+=Amount;
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Add
+
+
+ uint64_t
+ SstCounters::Value(
+ unsigned Index) const
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Value
+
+
+ void
+ SstCounters::Set(
+ unsigned Index,
+ uint64_t Value)
+ {
+ if (Index<m_CounterSize)
+ {
+ m_Counter[Index]=Value;
+ } // if
+
+ return;
+ } // SstCounters::Set
+
+
+ void
+ SstCounters::Dump() const
+ {
+ unsigned loop;
+
+ printf("SstCounters:\n");
+ printf(" m_IsReadOnly: %u\n", m_IsReadOnly);
+ printf(" m_Version: %u\n", m_Version);
+ printf(" m_CounterSize: %u\n", m_CounterSize);
+ for (loop=0; loop<m_CounterSize; ++loop)
+ printf(" Counter[%2u]: %" PRIu64 "\n", loop, m_Counter[loop]);
+
+ return;
+
+ } // SstCounters::Dump
+
+
+ // only used for local static objects, not shared memory objects
+ PerformanceCounters::PerformanceCounters()
+ {
+ m_Version=ePerfVersion;
+ m_CounterSize=ePerfCountEnumSize;
+ // cast away "volatile"
+ memset((void*)m_Counter, 0, sizeof(m_Counter));
+
+ return;
+
+ } // PerformanceCounters::PerformanceCounters
+
+
+ PerformanceCounters *
+ PerformanceCounters::Init(
+ bool IsReadOnly)
+ {
+ PerformanceCounters * ret_ptr;
+ bool should_create, good;
+ int ret_val, id;
+ struct shmid_ds shm_info;
+ size_t open_size;
+
+ ret_ptr=NULL;
+ memset(&shm_info, 0, sizeof(shm_info));
+ good=true;
+ open_size=sizeof(PerformanceCounters);
+
+ // first id attempt, minimal request
+ id=shmget(ePerfKey, 0, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (-1!=id)
+ ret_val=shmctl(id, IPC_STAT, &shm_info);
+ else
+ ret_val=-1;
+
+ // does the shared memory already exists (and of proper size if writing)
+ should_create=(0!=ret_val || (shm_info.shm_segsz < sizeof(PerformanceCounters))) && !IsReadOnly;
+
+ // should old shared memory be deleted?
+ if (should_create && 0==ret_val)
+ {
+ ret_val=shmctl(id, IPC_RMID, &shm_info);
+ good=(0==ret_val);
+ if (0!=ret_val)
+ syslog(LOG_ERR, "shmctl IPC_RMID failed [%d, %m]", errno);
+ } // if
+
+ // else open the size that exists
+ else if (0==ret_val)
+ {
+ open_size=shm_info.shm_segsz;
+ } // else if
+
+ // attempt to attach/create to shared memory instance
+ if (good)
+ {
+ int flags;
+
+ if (IsReadOnly)
+ flags = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+ else
+ flags = IPC_CREAT | S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+
+ m_PerfSharedId=shmget(ePerfKey, open_size, flags);
+ good=(-1!=m_PerfSharedId);
+ } // if
+
+ // map shared memory instance
+ if (good)
+ {
+ ret_ptr=(PerformanceCounters *)shmat(m_PerfSharedId, NULL, (IsReadOnly ? SHM_RDONLY : 0));
+ if ((void*)-1 != ret_ptr)
+ {
+ // initialize?
+ if (should_create || ePerfVersion!=ret_ptr->m_Version)
+ {
+ if (!IsReadOnly)
+ {
+ memset(ret_ptr, 0, sizeof(PerformanceCounters));
+ ret_ptr->m_Version=ePerfVersion;
+ ret_ptr->m_CounterSize=ePerfCountEnumSize;
+ } // if
+
+ // bad version match to existing segment
+ else
+ {
+ good=false;
+ errno=EINVAL;
+ } // else
+ } // if
+ } // if
+ else
+ {
+ good=false;
+ syslog(LOG_ERR, "shmat failed [%d, %m]", errno);
+ } // else
+
+ if (good)
+ {
+ // make this available process wide
+ gPerfCounters=ret_ptr;
+ } // if
+ else
+ {
+ ret_ptr=NULL;
+ m_LastError=errno;
+ } // else
+ } // if
+ else
+ {
+ m_LastError=errno;
+ ret_ptr=NULL;
+ } // else
+
+ return(ret_ptr);
+
+ }; // PerformanceCounters::Init
+
+
+ int
+ PerformanceCounters::Close(
+ PerformanceCounters * Counts)
+ {
+ int ret_val;
+
+ if (NULL!=Counts && &LocalStartupCounters != Counts)
+ {
+ // keep gPerf valid
+ if (gPerfCounters==Counts)
+ gPerfCounters=&LocalStartupCounters;
+
+ ret_val=shmdt(Counts);
+ if (0!=ret_val)
+ ret_val=errno;
+ } // if
+ else
+ {
+ ret_val=EINVAL;
+ } // else
+
+ return(ret_val);
+ } // PerformanceCounters::Close
+
+
+ uint64_t
+ PerformanceCounters::Inc(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ atomic_inc_64(val_ptr);
+#else
+ __sync_add_and_fetch(val_ptr, 1);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ ret_32=__sync_add_and_fetch(ptr_32, 1);
+ if (0==ret_32)
+ {
+ ++ptr_32;
+ __sync_add_and_fetch(ptr_32, 1);
+ } // if
+#endif
+ ret_val=*val_ptr;
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Inc
+
+
+ uint64_t
+ PerformanceCounters::Dec(
+ unsigned Index)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ atomic_dec_64(val_ptr);
+#else
+ __sync_sub_and_fetch(val_ptr, 1);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ ret_32=__sync_sub_and_fetch(ptr_32, 1);
+ if (0xFFFFFFFF==ret_32)
+ {
+ ++ptr_32;
+ __sync_sub_and_fetch(ptr_32, 1);
+ } // if
+#endif
+ ret_val=*val_ptr;
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Dec
+
+
+ uint64_t
+ PerformanceCounters::Add(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+# if ULONG_MAX != 4294967295UL
+#ifdef OS_SOLARIS
+ ret_val=atomic_add_64_nv(val_ptr, Amount);
+#else
+ ret_val=__sync_add_and_fetch(val_ptr, Amount);
+#endif
+#else
+ // hack fest for 64 bit semi-atomic on 32bit machine
+ uint32_t old_32, ret_32, * ptr_32;
+
+ ptr_32=(uint32_t *)&val_ptr;
+ old_32=*ptr_32;
+ ret_32=__sync_add_and_fetch(ptr_32, Amount);
+ if (ret_32<old_32)
+ {
+ ++ptr_32;
+ __sync_add_and_fetch(ptr_32, 1);
+ } // if
+
+ ret_val=*val_ptr;
+#endif
+ } // if
+
+ return(ret_val);
+ } // PerformanceCounters::Add
+
+
+ uint64_t
+ PerformanceCounters::Value(
+ unsigned Index) const
+ {
+ uint64_t ret_val;
+
+ ret_val=0;
+ if (Index<m_CounterSize)
+ {
+ ret_val=m_Counter[Index];
+ } // if
+
+ return(ret_val);
+ } // SstCounters::Value
+
+
+ void
+ PerformanceCounters::Set(
+ unsigned Index,
+ uint64_t Amount)
+ {
+ if (Index<m_CounterSize)
+ {
+ volatile uint64_t * val_ptr;
+
+ val_ptr=&m_Counter[Index];
+
+ *val_ptr=Amount;
+ } // if
+
+ return;
+ } // PerformanceCounters::Set
+
+
+ volatile const uint64_t *
+ PerformanceCounters::GetPtr(
+ unsigned Index) const
+ {
+ const volatile uint64_t * ret_ptr;
+
+ if (Index<m_CounterSize)
+ ret_ptr=&m_Counter[Index];
+ else
+ ret_ptr=&m_BogusCounter;
+
+ return(ret_ptr);
+
+ } // PerformanceCounters::GetPtr
+
+
+ const char *
+ PerformanceCounters::GetNamePtr(
+ unsigned Index)
+ {
+ const char * ret_ptr;
+
+ if (Index<ePerfCountEnumSize)
+ ret_ptr=m_PerfCounterNames[Index];
+ else
+ ret_ptr="???";
+
+ return(ret_ptr);
+
+ } // PerformanceCounters::GetPtr
+
+
+ int PerformanceCounters::m_PerfSharedId=-1;
+ int PerformanceCounters::m_LastError=0;
+ volatile uint64_t PerformanceCounters::m_BogusCounter=0;
+ const char * PerformanceCounters::m_PerfCounterNames[]=
+ {
+ "ROFileOpen",
+ "ROFileClose",
+ "ROFileUnmap",
+ "RWFileOpen",
+ "RWFileClose",
+ "RWFileUnmap",
+ "ApiOpen",
+ "ApiGet",
+ "ApiWrite",
+ "WriteSleep",
+ "WriteWaitImm",
+ "WriteWaitLevel0",
+ "WriteNewMem",
+ "WriteError",
+ "WriteNoWait",
+ "GetMem",
+ "GetImm",
+ "GetVersion",
+ "SearchLevel[0]",
+ "SearchLevel[1]",
+ "SearchLevel[2]",
+ "SearchLevel[3]",
+ "SearchLevel[4]",
+ "SearchLevel[5]",
+ "SearchLevel[6]",
+ "TableCached",
+ "TableOpened",
+ "TableGet",
+ "BGCloseUnmap",
+ "BGCompactImm",
+ "BGNormal",
+ "BGCompactLevel0",
+ "BlockFiltered",
+ "BlockFilterFalse",
+ "BlockCached",
+ "BlockRead",
+ "BlockFilterRead",
+ "BlockValidGet",
+ "Debug[0]",
+ "Debug[1]",
+ "Debug[2]",
+ "Debug[3]",
+ "Debug[4]",
+ "ReadBlockError",
+ "DBIterNew",
+ "DBIterNext",
+ "DBIterPrev",
+ "DBIterSeek",
+ "DBIterSeekFirst",
+ "DBIterSeekLast",
+ "DBIterDelete",
+ "eleveldbDirect",
+ "eleveldbQueued",
+ "eleveldbDequeued",
+ "elevelRefCreate",
+ "elevelRefDelete",
+ "ThrottleGauge",
+ "ThrottleCounter",
+ "ThrottleMicros0",
+ "ThrottleKeys0",
+ "ThrottleBacklog0",
+ "ThrottleCompacts0",
+ "ThrottleMicros1",
+ "ThrottleKeys1",
+ "ThrottleBacklog1",
+ "ThrottleCompacts1",
+ "BGWriteError",
+ "ThrottleWait",
+ "ThreadError",
+ "BGImmDirect",
+ "BGImmQueued",
+ "BGImmDequeued",
+ "BGImmWeighted",
+ "BGUnmapDirect",
+ "BGUnmapQueued",
+ "BGUnmapDequeued",
+ "BGUnmapWeighted",
+ "BGLevel0Direct",
+ "BGLevel0Queued",
+ "BGLevel0Dequeued",
+ "BGLevel0Weighted",
+ "BGCompactDirect",
+ "BGCompactQueued",
+ "BGCompactDequeued",
+ "BGCompactWeighted",
+ "FileCacheInsert",
+ "FileCacheRemove",
+ "BlockCacheInsert",
+ "BlockCacheRemove",
+ "ApiDelete"
+ };
+
+
+ int
+ PerformanceCounters::LookupCounter(
+ const char * Name)
+ {
+ int index,loop;
+
+ index=-1;
+
+ if (NULL!=Name && '\0'!=*Name)
+ {
+ for (loop=0; loop<ePerfCountEnumSize && -1==index; ++loop)
+ {
+ if (0==strcmp(m_PerfCounterNames[loop], Name))
+ index=loop;
+ } // loop
+ } // if
+
+ return(index);
+ };
+
+ void
+ PerformanceCounters::Dump()
+ {
+ int loop;
+
+ printf(" m_Version: %u\n", m_Version);
+ printf(" m_CounterSize: %u\n", m_CounterSize);
+
+ for (loop=0; loop<ePerfCountEnumSize; ++loop)
+ {
+ printf(" %s: %" PRIu64 "\n", m_PerfCounterNames[loop], m_Counter[loop]);
+ } // loop
+ }; // Dump
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/basho/perf_count.h b/src/third_party/wiredtiger/api/leveldb/basho/perf_count.h
new file mode 100644
index 00000000000..b0f4abf9b66
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/basho/perf_count.h
@@ -0,0 +1,298 @@
+// -------------------------------------------------------------------
+//
+// perf_count.h: performance counters LevelDB
+//
+// Copyright (c) 2012-2013 Basho Technologies, Inc. All Rights Reserved.
+//
+// This file is provided to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file
+// except in compliance with the License. You may obtain
+// a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// -------------------------------------------------------------------
+
+#ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+#define STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
+
+#include "leveldb_wt_config.h"
+
+#include <stdint.h>
+#include <string>
+#include "status.h"
+
+namespace leveldb {
+
+enum SstCountEnum
+{
+ //
+ // array index values/names
+ //
+ eSstCountKeys=0, //!< how many keys in this sst
+ eSstCountBlocks=1, //!< how many blocks in this sst
+ eSstCountCompressAborted=2,//!< how many blocks attempted compression and aborted use
+ eSstCountKeySize=3, //!< byte count of all keys
+ eSstCountValueSize=4, //!< byte count of all values
+ eSstCountBlockSize=5, //!< byte count of all blocks (pre-compression)
+ eSstCountBlockWriteSize=6, //!< post-compression size, or BlockSize if no compression
+ eSstCountIndexKeys=7, //!< how many keys in the index block
+ eSstCountKeyLargest=8, //!< largest key in sst
+ eSstCountKeySmallest=9, //!< smallest key in sst
+ eSstCountValueLargest=10, //!< largest value in sst
+ eSstCountValueSmallest=11, //!< smallest value in sst
+ eSstCountDeleteKey=12, //!< tombstone count
+ eSstCountBlockSizeUsed=13, //!< Options::block_size used with this file
+ eSstCountUserDataSize=14, //!< post-compression size of non-metadata (user keys/values/block overhead)
+
+ // must follow last index name to represent size of array
+ eSstCountEnumSize, //!< size of the array described by the enum values
+
+ eSstCountVersion=1
+
+}; // enum SstCountEnum
+
+
+class SstCounters
+{
+protected:
+ bool m_IsReadOnly; //!< set when data decoded from a file
+ uint32_t m_Version; //!< object revision identification
+ uint32_t m_CounterSize; //!< number of objects in m_Counter
+
+ uint64_t m_Counter[eSstCountEnumSize];
+
+public:
+ // constructors / destructor
+ SstCounters();
+
+ // Put data into disk form
+ void EncodeTo(std::string & Dst) const;
+
+ // Populate member data from prior EncodeTo block
+ Status DecodeFrom(const Slice& src);
+
+ // increment the counter
+ uint64_t Inc(unsigned Index);
+
+ // add value to the counter
+ uint64_t Add(unsigned Index, uint64_t Amount);
+
+ // return value of a counter
+ uint64_t Value(unsigned Index) const;
+
+ // set a value
+ void Set(unsigned Index, uint64_t);
+
+ // return number of counters
+ uint32_t Size() const {return(m_CounterSize);};
+
+ // printf all values
+ void Dump() const;
+
+}; // class SstCounters
+
+
+extern struct PerformanceCounters * gPerfCounters;
+
+
+enum PerformanceCountersEnum
+{
+ //
+ // array index values/names
+ // (enum explicitly numbered to allow future edits / moves / inserts)
+ //
+ ePerfROFileOpen=0, //!< PosixMmapReadableFile open
+ ePerfROFileClose=1, //!< closed
+ ePerfROFileUnmap=2, //!< unmap without close
+
+ ePerfRWFileOpen=3, //!< PosixMmapFile open
+ ePerfRWFileClose=4, //!< closed
+ ePerfRWFileUnmap=5, //!< unmap without close
+
+ ePerfApiOpen=6, //!< Count of DB::Open completions
+ ePerfApiGet=7, //!< Count of DBImpl::Get completions
+ ePerfApiWrite=8, //!< Count of DBImpl::Get completions
+
+ ePerfWriteSleep=9, //!< DBImpl::MakeRoomForWrite called sleep
+ ePerfWriteWaitImm=10, //!< DBImpl::MakeRoomForWrite called Wait on Imm compact
+ ePerfWriteWaitLevel0=11,//!< DBImpl::MakeRoomForWrite called Wait on Level0 compact
+ ePerfWriteNewMem=12, //!< DBImpl::MakeRoomForWrite created new memory log
+ ePerfWriteError=13, //!< DBImpl::MakeRoomForWrite saw bg_error_
+ ePerfWriteNoWait=14, //!< DBImpl::MakeRoomForWrite took no action
+
+ ePerfGetMem=15, //!< DBImpl::Get read from memory log
+ ePerfGetImm=16, //!< DBImpl::Get read from previous memory log
+ ePerfGetVersion=17, //!< DBImpl::Get read from Version object
+
+ // code ASSUMES the levels are in numerical order,
+ // i.e. based off of ePerfSearchLevel0
+ ePerfSearchLevel0=18, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel1=19, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel2=20, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel3=21, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel4=22, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel5=23, //!< Version::Get read searched one or more files here
+ ePerfSearchLevel6=24, //!< Version::Get read searched one or more files here
+
+ ePerfTableCached=25, //!< TableCache::FindTable found table in cache
+ ePerfTableOpened=26, //!< TableCache::FindTable had to open table file
+ ePerfTableGet=27, //!< TableCache::Get used to retrieve a key
+
+ ePerfBGCloseUnmap=28, //!< PosixEnv::BGThreaed started Unmap/Close job
+ ePerfBGCompactImm=29, //!< PosixEnv::BGThreaed started compaction of Imm
+ ePerfBGNormal=30, //!< PosixEnv::BGThreaed started normal compaction job
+ ePerfBGCompactLevel0=31,//!< PosixEnv::BGThreaed started compaction of Level0
+
+ ePerfBlockFiltered=32, //!< Table::BlockReader search stopped due to filter
+ ePerfBlockFilterFalse=33,//!< Table::BlockReader gave a false positive for match
+ ePerfBlockCached=34, //!< Table::BlockReader found block in cache
+ ePerfBlockRead=35, //!< Table::BlockReader read block from disk
+ ePerfBlockFilterRead=36,//!< Table::ReadMeta filter loaded from file
+ ePerfBlockValidGet=37, //!< Table::InternalGet has valid iterator
+
+ ePerfDebug0=38, //!< Developer debug counters, moveable
+ ePerfDebug1=39, //!< Developer debug counters, moveable
+ ePerfDebug2=40, //!< Developer debug counters, moveable
+ ePerfDebug3=41, //!< Developer debug counters, moveable
+ ePerfDebug4=42, //!< Developer debug counters, moveable
+
+ ePerfReadBlockError=43, //!< crc or compression error in ReadBlock (format.cc)
+
+ ePerfIterNew=44, //!< Count of DBImpl::NewDBIterator calls
+ ePerfIterNext=45, //!< Count of DBIter::Next calls
+ ePerfIterPrev=46, //!< Count of DBIter::Prev calls
+ ePerfIterSeek=47, //!< Count of DBIter::Seek calls
+ ePerfIterSeekFirst=48, //!< Count of DBIter::SeekFirst calls
+ ePerfIterSeekLast=49, //!< Count of DBIter::SeekLast calls
+ ePerfIterDelete=50, //!< Count of DBIter::~DBIter
+
+ ePerfElevelDirect=51, //!< eleveldb's FindWaitingThread went direct to thread
+ ePerfElevelQueued=52, //!< eleveldb's FindWaitingThread queued work item
+ ePerfElevelDequeued=53, //!< eleveldb's worker took item from backlog queue
+
+ ePerfElevelRefCreate=54,//!< eleveldb RefObject constructed
+ ePerfElevelRefDelete=55,//!< eleveldb RefObject destructed
+
+ ePerfThrottleGauge=56, //!< current throttle value
+ ePerfThrottleCounter=57,//!< running throttle by seconds
+
+ ePerfThrottleMicros0=58,//!< level 0 micros spent compacting
+ ePerfThrottleKeys0=59, //!< level 0 keys processed
+ ePerfThrottleBacklog0=60,//!< backlog at time of posting (level0)
+ ePerfThrottleCompacts0=61,//!< number of level 0 compactions
+
+ ePerfThrottleMicros1=62,//!< level 1+ micros spent compacting
+ ePerfThrottleKeys1=63, //!< level 1+ keys processed
+ ePerfThrottleBacklog1=64,//!< backlog at time of posting (level1+)
+ ePerfThrottleCompacts1=65,//!< number of level 1+ compactions
+
+ ePerfBGWriteError=66, //!< error in write/close, see syslog
+
+ ePerfThrottleWait=67, //!< milliseconds of throttle wait
+ ePerfThreadError=68, //!< system error on thread related call, no LOG access
+
+ ePerfBGImmDirect=69, //!< count Imm compactions happened directly
+ ePerfBGImmQueued=70, //!< count Imm compactions placed on queue
+ ePerfBGImmDequeued=71, //!< count Imm compactions removed from queue
+ ePerfBGImmWeighted=72, //!< total microseconds item spent on queue
+
+ ePerfBGUnmapDirect=73, //!< count Unmap operations happened directly
+ ePerfBGUnmapQueued=74, //!< count Unmap operations placed on queue
+ ePerfBGUnmapDequeued=75,//!< count Unmap operations removed from queue
+ ePerfBGUnmapWeighted=76,//!< total microseconds item spent on queue
+
+ ePerfBGLevel0Direct=77, //!< count Level0 compactions happened directly
+ ePerfBGLevel0Queued=78, //!< count Level0 compactions placed on queue
+ ePerfBGLevel0Dequeued=79,//!< count Level0 compactions removed from queue
+ ePerfBGLevel0Weighted=80,//!< total microseconds item spent on queue
+
+ ePerfBGCompactDirect=81, //!< count generic compactions happened directly
+ ePerfBGCompactQueued=82, //!< count generic compactions placed on queue
+ ePerfBGCompactDequeued=83,//!< count generic compactions removed from queue
+ ePerfBGCompactWeighted=84,//!< total microseconds item spent on queue
+
+ ePerfFileCacheInsert=85, //!< total bytes inserted into file cache
+ ePerfFileCacheRemove=86, //!< total bytes removed from file cache
+
+ ePerfBlockCacheInsert=87, //!< total bytes inserted into block cache
+ ePerfBlockCacheRemove=88, //!< total bytes removed from block cache
+
+ ePerfApiDelete=89, //!< Count of DB::Delete
+
+ // must follow last index name to represent size of array
+ // (ASSUMES previous enum is highest value)
+ ePerfCountEnumSize, //!< size of the array described by the enum values
+
+ ePerfVersion=1, //!< structure versioning
+ ePerfKey=41207 //!< random number as shared memory identifier
+};
+
+//
+// Do NOT use virtual functions. This structure will be aligned at different
+// locations in multiple processes. Things can get messy with virtuals.
+
+struct PerformanceCounters
+{
+public:
+ static int m_LastError;
+
+protected:
+ uint32_t m_Version; //!< object revision identification
+ uint32_t m_CounterSize; //!< number of objects in m_Counter
+
+ volatile uint64_t m_Counter[ePerfCountEnumSize];
+
+ static const char * m_PerfCounterNames[];
+ static int m_PerfSharedId;
+ static volatile uint64_t m_BogusCounter; //!< for out of range GetPtr calls
+
+public:
+ // only called for local object, not for shared memory
+ PerformanceCounters();
+
+ //!< does executable's idea of version match shared object?
+ bool VersionTest()
+ {return(ePerfCountEnumSize<=m_CounterSize && ePerfVersion==m_Version);};
+
+ //!< mostly for perf_count_test.cc
+ void SetVersion(uint32_t Version, uint32_t CounterSize)
+ {m_Version=Version; m_CounterSize=CounterSize;};
+
+ static PerformanceCounters * Init(bool IsReadOnly);
+ static int Close(PerformanceCounters * Counts);
+
+ uint64_t Inc(unsigned Index);
+ uint64_t Dec(unsigned Index);
+
+ // add value to the counter
+ uint64_t Add(unsigned Index, uint64_t Amount);
+
+ // return value of a counter
+ uint64_t Value(unsigned Index) const;
+
+ // set a value
+ void Set(unsigned Index, uint64_t);
+
+ volatile const uint64_t * GetPtr(unsigned Index) const;
+
+ static const char * GetNamePtr(unsigned Index);
+
+ int LookupCounter(const char * Name);
+
+ void Dump();
+
+}; // struct PerformanceCounters
+
+extern PerformanceCounters * gPerfCounters;
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/config.hin b/src/third_party/wiredtiger/api/leveldb/config.hin
new file mode 100644
index 00000000000..131b68969d3
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/config.hin
@@ -0,0 +1,22 @@
+/* api/leveldb/config.hin. Generated by autoheader, then hand-edited. */
+
+/* Build the LevelDB API with Basho LevelDB support. */
+#undef HAVE_BASHOLEVELDB
+
+/* Snappy support automatically loaded. */
+#undef HAVE_BUILTIN_EXTENSION_SNAPPY
+
+/* Zlib support automatically loaded. */
+#undef HAVE_BUILTIN_EXTENSION_ZLIB
+
+/* Define to 1 for diagnostic tests. */
+#undef HAVE_DIAGNOSTIC
+
+/* Build the LevelDB API with HyperLevelDB support. */
+#undef HAVE_HYPERLEVELDB
+
+/* Define to 1 if you have the `snappy' library (-lsnappy). */
+#undef HAVE_LIBSNAPPY
+
+/* Build the LevelDB API with RocksDB support. */
+#undef HAVE_ROCKSDB
diff --git a/src/third_party/wiredtiger/api/leveldb/dummy.cc b/src/third_party/wiredtiger/api/leveldb/dummy.cc
new file mode 100644
index 00000000000..d56f03b544b
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/dummy.cc
@@ -0,0 +1,28 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Nothing to see, just keep build tools happy. */
diff --git a/src/third_party/wiredtiger/api/leveldb/hyper_wt.cc b/src/third_party/wiredtiger/api/leveldb/hyper_wt.cc
new file mode 100644
index 00000000000..95c82289e18
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/hyper_wt.cc
@@ -0,0 +1,415 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "leveldb_wt.h"
+#include <errno.h>
+#include <sstream>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+using leveldb::ReplayIterator;
+using leveldb::Status;
+
+// Fill in missing methods from the interface
+ReplayIterator::ReplayIterator() {}
+ReplayIterator::~ReplayIterator() {}
+
+class ReplayIteratorImpl : public ReplayIterator {
+ public:
+ ReplayIteratorImpl(OperationContext *context) : context_(context), cursor_(NULL) {
+ WT_SESSION *session = context_->GetSession();
+ int ret = session->open_cursor(
+ session, "log:", NULL, NULL, &cursor_);
+ status_ = WiredTigerErrorToStatus(ret);
+ valid_ = false;
+ // Position on first record. valid_ will be set appropriately.
+ Next();
+ }
+
+ ReplayIteratorImpl(OperationContext *context, const std::string& timestamp) :
+ context_(context), cursor_(NULL) {
+
+ WT_SESSION *session = context_->GetSession();
+ int ret = session->open_cursor(
+ session, "log:", NULL, NULL, &cursor_);
+ status_ = WiredTigerErrorToStatus(ret);
+ valid_ = false;
+ // Position on requested record. valid_ will be set appropriately.
+ SeekTo(timestamp);
+ }
+
+ // An iterator is either positioned at a deleted key, present key/value pair,
+ // or not valid. This method returns true iff the iterator is valid.
+ virtual bool Valid();
+
+ // Moves to the next entry in the source. After this call, Valid() is
+ // true iff the iterator was not positioned at the last entry in the source.
+ // REQUIRES: Valid()
+ virtual void Next();
+
+ // Position at the first key in the source that at or past target for this
+ // pass. Note that this is unlike the Seek call, as the ReplayIterator is
+ // unsorted.
+ // The iterator is Valid() after this call iff the source contains
+ // an entry that comes at or past target.
+ // Per Robert at Hyperdex, the SkipTo functions are hacky optimizations
+ // for LevelDB and its key layout. It is okay for them to be no-ops.
+ virtual void SkipTo(const Slice& target) { }
+ virtual void SkipToLast() { }
+ virtual void SeekTo(const std::string& timestamp);
+ virtual void SeekToLast();
+
+ // Return true if the current entry points to a key-value pair. If this
+ // returns false, it means the current entry is a deleted entry.
+ virtual bool HasValue() {
+ assert(Valid());
+ if (optype == WT_LOGOP_ROW_PUT ||
+ optype == WT_LOGOP_COL_PUT)
+ return true;
+ else
+ return false;
+ }
+
+ int Compare(ReplayIteratorImpl* other) {
+ int cmp;
+ assert(Valid());
+ // assert(other->Valid());
+ int ret = cursor_->compare(cursor_, other->cursor_, &cmp);
+ status_ = WiredTigerErrorToStatus(ret);
+ return (cmp);
+ }
+
+ // Return the key for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: Valid()
+ virtual Slice key() const { return Slice((const char *)key_.data, key_.size); }
+
+ // Return the value for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: !AtEnd() && !AtStart()
+ virtual Slice value() const { return Slice((const char *)value_.data, value_.size); }
+
+ // If an error has occurred, return it. Else return an ok status.
+ virtual Status status() const { return status_; }
+
+ // must be released by giving it back to the DB
+ virtual ~ReplayIteratorImpl() {
+ int ret = Close();
+ assert(ret == 0);
+ }
+
+ std::string GetTimestamp() {
+ char lsn[256];
+ assert(Valid());
+ snprintf(lsn, sizeof(lsn), WT_TIMESTAMP_FORMAT,
+ lsn_.file, lsn_.offset);
+ return (std::string(lsn));
+ }
+
+ int Close() {
+ int ret = 0;
+ if (cursor_ != NULL)
+ ret = cursor_->close(cursor_);
+ status_ = WiredTigerErrorToStatus(ret);
+ valid_ = false;
+ cursor_ = NULL;
+ return (ret);
+ }
+
+ private:
+ void SeekTo(WT_LSN *lsn);
+ // No copying allowed
+ ReplayIteratorImpl(const ReplayIterator&) { }
+ void operator=(const ReplayIterator&) { }
+ OperationContext *context_;
+ Status status_;
+ WT_CURSOR *cursor_;
+ WT_ITEM key_, value_;
+ WT_LSN lsn_;
+ bool valid_;
+ uint64_t txnid;
+ uint32_t fileid, opcount, optype, rectype;
+};
+
+bool
+ReplayIteratorImpl::Valid() {
+ // If we're invalid and at the end, try again.
+ if (valid_ == false && cursor_ != NULL && status_.IsNotFound())
+ Next();
+ return valid_;
+}
+
+void
+ReplayIteratorImpl::Next() {
+ int ret = 0;
+
+ if (cursor_ != NULL) {
+ while ((ret = cursor_->next(cursor_)) == 0) {
+ ret = cursor_->get_key(cursor_,
+ &lsn_.file, &lsn_.offset, &opcount);
+ if (ret != 0)
+ break;
+ ret = cursor_->get_value(cursor_,
+ &txnid, &rectype, &optype, &fileid, &key_, &value_);
+ if (ret != 0)
+ break;
+ // Next() is only interested in modification operations.
+ // Continue for any other type of record.
+ if (WT_VALID_OPERATION(fileid, optype)) {
+ valid_ = true;
+ break;
+ }
+ }
+ status_ = WiredTigerErrorToStatus(ret);
+ if (ret != 0) {
+ valid_ = false;
+ if (ret != WT_NOTFOUND)
+ ret = Close();
+ else
+ ret = 0;
+ assert(ret == 0);
+ }
+ }
+}
+
+void
+ReplayIteratorImpl::SeekToLast() {
+ int ret = 0;
+ WT_LSN last_lsn;
+
+ last_lsn.file = 0;
+ if (cursor_ != NULL) {
+ // Walk the log to the end, then set the cursor on the
+ // last valid LSN we saw.
+ while ((ret = cursor_->next(cursor_)) == 0) {
+ ret = cursor_->get_key(cursor_,
+ &lsn_.file, &lsn_.offset, &opcount);
+ if (ret != 0)
+ break;
+ ret = cursor_->get_value(cursor_,
+ &txnid, &rectype, &optype, &fileid, &key_, &value_);
+ if (ret != 0)
+ break;
+ // We're only interested in modification operations.
+ // Continue for any other type of record.
+ if (WT_VALID_OPERATION(fileid, optype)) {
+ valid_ = true;
+ last_lsn = lsn_;
+ }
+ }
+ // We reached the end of log
+ if (ret != WT_NOTFOUND || last_lsn.file == 0) {
+ valid_ = false;
+ ret = Close();
+ assert(ret == 0);
+ } else
+ SeekTo(&last_lsn);
+ }
+}
+
+void
+ReplayIteratorImpl::SeekTo(const std::string& timestamp) {
+ WT_LSN target_lsn;
+ int ret = 0;
+
+ if (timestamp == "all") {
+ if (cursor_ != NULL) {
+ ret = cursor_->reset(cursor_);
+ status_ = WiredTigerErrorToStatus(ret);
+ if (ret != 0)
+ return;
+ Next();
+ return;
+ }
+ }
+ if (timestamp == "now") {
+ SeekToLast();
+ return;
+ }
+ sscanf(timestamp.c_str(), WT_TIMESTAMP_FORMAT,
+ &target_lsn.file, &target_lsn.offset);
+ SeekTo(&target_lsn);
+}
+
+// Set the cursor on the first modification record at or after the
+// given LSN.
+void
+ReplayIteratorImpl::SeekTo(WT_LSN *target_lsn) {
+ int ret = 0;
+
+ valid_ = false;
+ if (cursor_ != NULL) {
+ cursor_->set_key(cursor_,
+ target_lsn->file, target_lsn->offset, 0, 0);
+ ret = cursor_->search(cursor_);
+ status_ = WiredTigerErrorToStatus(ret);
+ if (ret != 0)
+ return;
+ // If we were successful, set up the info.
+ ret = cursor_->get_key(cursor_,
+ &lsn_.file, &lsn_.offset, &opcount);
+ status_ = WiredTigerErrorToStatus(ret);
+ if (ret != 0)
+ return;
+ ret = cursor_->get_value(cursor_,
+ &txnid, &rectype, &optype, &fileid, &key_, &value_);
+ status_ = WiredTigerErrorToStatus(ret);
+ if (ret != 0)
+ return;
+ valid_ = true;
+ // We're only interested in modification operations.
+ // Continue for any other type of record.
+ if (!WT_VALID_OPERATION(fileid, optype))
+ Next();
+ }
+}
+
+// Create a live backup of a live LevelDB instance.
+// The backup is stored in a directory named "backup-<name>" under the top
+// level of the open LevelDB database. The implementation is permitted, and
+// even encouraged, to improve the performance of this call through
+// hard-links.
+Status
+DbImpl::LiveBackup(const Slice& name)
+{
+ OperationContext *context = GetContext();
+ WT_SESSION *session = context->GetSession();
+ WT_CURSOR *cursor;
+ int ret = session->open_cursor(
+ session, "backup:", NULL, NULL, &cursor);
+ int t_ret;
+ const char *filename;
+ const char *home = conn_->get_home(conn_);
+ char backup[MAXPATHLEN], buf[MAXPATHLEN * 2];
+
+ // If we couldn't open the backup cursor, we're done.
+ if (ret != 0)
+ return (WiredTigerErrorToStatus(ret));
+
+ // Remove any old directory and create the backup directory.
+ // WT single-threads hot backups. If we get here we already have
+ // the backup cursor open and we do not have to worry about other
+ // threads trying to remove and recreate the same directory out
+ // from under us.
+ snprintf(buf, sizeof(buf), "rm -rf %s/backup-%s", home,
+ (char *)name.data());
+ if ((ret = system(buf)) != 0)
+ return WiredTigerErrorToStatus(ret);
+ snprintf(backup, sizeof(backup), "%s/backup-%s", home,
+ (char *)name.data());
+ if ((ret = mkdir(backup, 0777)) != 0)
+ return WiredTigerErrorToStatus(ret);
+ // Copy all files returned by backup cursor.
+ while ((ret = cursor->next(cursor)) == 0 &&
+ (ret = cursor->get_key(cursor, &filename)) == 0) {
+ snprintf(buf, sizeof(buf), "cp %s/%s %s/%s",
+ home, filename, backup, filename);
+ if ((ret = system(buf)) != 0)
+ break;
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ if ((t_ret = cursor->close(cursor)) != 0 && ret == 0)
+ ret = t_ret;
+
+ return (WiredTigerErrorToStatus(ret));
+}
+
+// Return an opaque timestamp that identifies the current point in time of the
+// database. This timestamp may be subsequently presented to the
+// NewReplayIterator method to create a ReplayIterator.
+void
+DbImpl::GetReplayTimestamp(std::string* timestamp)
+{
+ OperationContext *context = GetContext();
+ ReplayIteratorImpl *iter = new ReplayIteratorImpl(context);
+
+ iter->SeekToLast();
+ *timestamp = iter->GetTimestamp();
+ ReleaseReplayIterator(iter);
+}
+
+// Set the lower bound for manual garbage collection. This method only takes
+// effect when Options.manual_garbage_collection is true.
+void
+DbImpl::AllowGarbageCollectBeforeTimestamp(const std::string& timestamp)
+{
+}
+
+// Validate the timestamp
+bool
+DbImpl::ValidateTimestamp(const std::string& timestamp)
+{
+ bool valid;
+ OperationContext *context = GetContext();
+ ReplayIteratorImpl *iter = new ReplayIteratorImpl(context);
+
+ // The SeekTo function will handle "all" or "now".
+ iter->SeekTo(timestamp);
+ valid = iter->Valid();
+ ReleaseReplayIterator(iter);
+ return valid;
+}
+
+// Compare two timestamps and return -1, 0, 1 for lt, eq, gt
+int
+DbImpl::CompareTimestamps(const std::string& lhs, const std::string& rhs)
+{
+ OperationContext *context = GetContext();
+ ReplayIteratorImpl *lhiter = new ReplayIteratorImpl(context);
+ ReplayIteratorImpl *rhiter = new ReplayIteratorImpl(context);
+ int cmp = 0;
+
+ // The SeekTo function will handle "all" or "now".
+ lhiter->SeekTo(lhs);
+ rhiter->SeekTo(rhs);
+ if (lhiter->Valid() && rhiter->Valid())
+ cmp = lhiter->Compare(rhiter);
+ ReleaseReplayIterator(lhiter);
+ ReleaseReplayIterator(rhiter);
+ return cmp;
+}
+
+// Return a ReplayIterator that returns every write operation performed after
+// the timestamp.
+Status
+DbImpl::GetReplayIterator(const std::string& timestamp,
+ ReplayIterator** iter)
+{
+ OperationContext *context = GetContext();
+ *iter = new ReplayIteratorImpl(context, timestamp);
+ return ((*iter)->status());
+}
+
+// Release a previously allocated replay iterator.
+void
+DbImpl::ReleaseReplayIterator(ReplayIterator* iter)
+{
+ delete static_cast<ReplayIteratorImpl *>(iter);
+}
diff --git a/src/third_party/wiredtiger/api/leveldb/hyperleveldb/AUTHORS b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/AUTHORS
new file mode 100644
index 00000000000..bf024aba6a8
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/AUTHORS
@@ -0,0 +1,15 @@
+# Names should be added to this file like so:
+# Name or Organization <email address>
+
+Google Inc.
+
+# Initial version authors:
+Jeffrey Dean <jeff@google.com>
+Sanjay Ghemawat <sanjay@google.com>
+
+# Partial list of contributors:
+Kevin Regan <kevin.d.regan@gmail.com>
+Johan Bilien <jobi@litl.com>
+
+# HyperLevelDB authors:
+Robert Escriva <robert@hyperdex.org>
diff --git a/src/third_party/wiredtiger/api/leveldb/hyperleveldb/LICENSE b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/LICENSE
new file mode 100644
index 00000000000..262b0af095d
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+Copyright (c) 2013-2014 The HyperLevelDB Authors. All rights reserved. (HyperLevelDB changes)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/third_party/wiredtiger/api/leveldb/hyperleveldb/replay_iterator.h b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/replay_iterator.h
new file mode 100644
index 00000000000..397acdfd889
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/hyperleveldb/replay_iterator.h
@@ -0,0 +1,67 @@
+// Copyright (c) 2013 The HyperLevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_
+#define STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_
+
+#include "leveldb_wt_config.h"
+
+#include "slice.h"
+#include "status.h"
+
+namespace leveldb {
+
+class ReplayIterator {
+ public:
+ ReplayIterator();
+
+ // An iterator is either positioned at a deleted key, present key/value pair,
+ // or not valid. This method returns true iff the iterator is valid.
+ virtual bool Valid() = 0;
+
+ // Moves to the next entry in the source. After this call, Valid() is
+ // true iff the iterator was not positioned at the last entry in the source.
+ // REQUIRES: Valid()
+ virtual void Next() = 0;
+
+ // Position at the first key in the source that at or past target for this
+ // pass. Note that this is unlike the Seek call, as the ReplayIterator is
+ // unsorted.
+ // The iterator is Valid() after this call iff the source contains
+ // an entry that comes at or past target.
+ virtual void SkipTo(const Slice& target) = 0;
+ virtual void SkipToLast() = 0;
+
+ // Return true if the current entry points to a key-value pair. If this
+ // returns false, it means the current entry is a deleted entry.
+ virtual bool HasValue() = 0;
+
+ // Return the key for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: Valid()
+ virtual Slice key() const = 0;
+
+ // Return the value for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: !AtEnd() && !AtStart()
+ virtual Slice value() const = 0;
+
+ // If an error has occurred, return it. Else return an ok status.
+ virtual Status status() const = 0;
+
+ protected:
+ // must be released by giving it back to the DB
+ virtual ~ReplayIterator();
+
+ private:
+ // No copying allowed
+ ReplayIterator(const ReplayIterator&);
+ void operator=(const ReplayIterator&);
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/AUTHORS b/src/third_party/wiredtiger/api/leveldb/leveldb/AUTHORS
new file mode 100644
index 00000000000..27a9407e52f
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/AUTHORS
@@ -0,0 +1,8 @@
+# Names should be added to this file like so:
+# Name or Organization <email address>
+
+Google Inc.
+
+# Initial version authors:
+Jeffrey Dean <jeff@google.com>
+Sanjay Ghemawat <sanjay@google.com>
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/LICENSE b/src/third_party/wiredtiger/api/leveldb/leveldb/LICENSE
new file mode 100644
index 00000000000..8e80208cd72
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/db/dbformat.h b/src/third_party/wiredtiger/api/leveldb/leveldb/db/dbformat.h
new file mode 100644
index 00000000000..2c8a9d5f5a7
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/db/dbformat.h
@@ -0,0 +1,233 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_FORMAT_H_
+#define STORAGE_LEVELDB_DB_FORMAT_H_
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+#include "util/coding.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+// Grouping of constants. We may want to make some of these
+// parameters set via options.
+namespace config {
+static const int kNumLevels = 7;
+
+// Level-0 compaction is started when we hit this many files.
+static const int kL0_CompactionTrigger = 4;
+
+// Soft limit on number of level-0 files. We slow down writes at this point.
+static const int kL0_SlowdownWritesTrigger = 8;
+
+// Maximum number of level-0 files. We stop writes at this point.
+static const int kL0_StopWritesTrigger = 12;
+
+// Maximum level to which a new compacted memtable is pushed if it
+// does not create overlap. We try to push to level 2 to avoid the
+// relatively expensive level 0=>1 compactions and to avoid some
+// expensive manifest file operations. We do not push all the way to
+// the largest level since that can generate a lot of wasted disk
+// space if the same key space is being repeatedly overwritten.
+static const int kMaxMemCompactLevel = 2;
+
+} // namespace config
+
+class InternalKey;
+
+// Value types encoded as the last component of internal keys.
+// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
+// data structures.
+enum ValueType {
+ kTypeDeletion = 0x0,
+ kTypeValue = 0x1
+#ifdef HAVE_ROCKSDB
+ ,kTypeMerge = 0x2,
+ // Following types are used only in write ahead logs. They are not used in
+ // memtables or sst files:
+ kTypeLogData = 0x3,
+ kTypeColumnFamilyDeletion = 0x4,
+ kTypeColumnFamilyValue = 0x5,
+ kTypeColumnFamilyMerge = 0x6,
+ kMaxValue = 0x7F
+#endif
+};
+// kValueTypeForSeek defines the ValueType that should be passed when
+// constructing a ParsedInternalKey object for seeking to a particular
+// sequence number (since we sort sequence numbers in decreasing order
+// and the value type is embedded as the low 8 bits in the sequence
+// number in internal keys, we need to use the highest-numbered
+// ValueType, not the lowest).
+static const ValueType kValueTypeForSeek = kTypeValue;
+
+typedef uint64_t SequenceNumber;
+
+// We leave eight bits empty at the bottom so a type and sequence#
+// can be packed together into 64-bits.
+static const SequenceNumber kMaxSequenceNumber =
+ ((0x1ull << 56) - 1);
+
+struct ParsedInternalKey {
+ Slice user_key;
+ SequenceNumber sequence;
+ ValueType type;
+
+ ParsedInternalKey() { } // Intentionally left uninitialized (for speed)
+ ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
+ : user_key(u), sequence(seq), type(t) { }
+ std::string DebugString() const;
+};
+
+// Return the length of the encoding of "key".
+inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
+ return key.user_key.size() + 8;
+}
+
+// Append the serialization of "key" to *result.
+extern void AppendInternalKey(std::string* result,
+ const ParsedInternalKey& key);
+
+// Attempt to parse an internal key from "internal_key". On success,
+// stores the parsed data in "*result", and returns true.
+//
+// On error, returns false, leaves "*result" in an undefined state.
+extern bool ParseInternalKey(const Slice& internal_key,
+ ParsedInternalKey* result);
+
+// Returns the user key portion of an internal key.
+inline Slice ExtractUserKey(const Slice& internal_key) {
+ assert(internal_key.size() >= 8);
+ return Slice(internal_key.data(), internal_key.size() - 8);
+}
+
+inline ValueType ExtractValueType(const Slice& internal_key) {
+ assert(internal_key.size() >= 8);
+ const size_t n = internal_key.size();
+ uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+ unsigned char c = num & 0xff;
+ return static_cast<ValueType>(c);
+}
+
+// A comparator for internal keys that uses a specified comparator for
+// the user key portion and breaks ties by decreasing sequence number.
+class InternalKeyComparator : public Comparator {
+ private:
+ const Comparator* user_comparator_;
+ public:
+ explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { }
+ virtual const char* Name() const;
+ virtual int Compare(const Slice& a, const Slice& b) const;
+ virtual void FindShortestSeparator(
+ std::string* start,
+ const Slice& limit) const;
+ virtual void FindShortSuccessor(std::string* key) const;
+
+ const Comparator* user_comparator() const { return user_comparator_; }
+
+ int Compare(const InternalKey& a, const InternalKey& b) const;
+};
+
+// Filter policy wrapper that converts from internal keys to user keys
+class InternalFilterPolicy : public FilterPolicy {
+ private:
+ const FilterPolicy* const user_policy_;
+ public:
+ explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { }
+ virtual const char* Name() const;
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const;
+ virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const;
+};
+
+// Modules in this directory should keep internal keys wrapped inside
+// the following class instead of plain strings so that we do not
+// incorrectly use string comparisons instead of an InternalKeyComparator.
+class InternalKey {
+ private:
+ std::string rep_;
+ public:
+ InternalKey() { } // Leave rep_ as empty to indicate it is invalid
+ InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
+ AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
+ }
+
+ void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
+ Slice Encode() const {
+ assert(!rep_.empty());
+ return rep_;
+ }
+
+ Slice user_key() const { return ExtractUserKey(rep_); }
+
+ void SetFrom(const ParsedInternalKey& p) {
+ rep_.clear();
+ AppendInternalKey(&rep_, p);
+ }
+
+ void Clear() { rep_.clear(); }
+
+ std::string DebugString() const;
+};
+
+inline int InternalKeyComparator::Compare(
+ const InternalKey& a, const InternalKey& b) const {
+ return Compare(a.Encode(), b.Encode());
+}
+
+inline bool ParseInternalKey(const Slice& internal_key,
+ ParsedInternalKey* result) {
+ const size_t n = internal_key.size();
+ if (n < 8) return false;
+ uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
+ unsigned char c = num & 0xff;
+ result->sequence = num >> 8;
+ result->type = static_cast<ValueType>(c);
+ result->user_key = Slice(internal_key.data(), n - 8);
+ return (c <= static_cast<unsigned char>(kTypeValue));
+}
+
+// A helper class useful for DBImpl::Get()
+class LookupKey {
+ public:
+ // Initialize *this for looking up user_key at a snapshot with
+ // the specified sequence number.
+ LookupKey(const Slice& user_key, SequenceNumber sequence);
+
+ ~LookupKey();
+
+ // Return a key suitable for lookup in a MemTable.
+ Slice memtable_key() const { return Slice(start_, end_ - start_); }
+
+ // Return an internal key (suitable for passing to an internal iterator)
+ Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
+
+ // Return the user key
+ Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
+
+ private:
+ // We construct a char array of the form:
+ // klength varint32 <-- start_
+ // userkey char[klength] <-- kstart_
+ // tag uint64
+ // <-- end_
+ // The array is a suitable MemTable key.
+ // The suffix starting with "userkey" can be used as an InternalKey.
+ const char* start_;
+ const char* kstart_;
+ const char* end_;
+ char space_[200]; // Avoid allocation for short keys
+
+ // No copying allowed
+ LookupKey(const LookupKey&);
+ void operator=(const LookupKey&);
+};
+
+inline LookupKey::~LookupKey() {
+ if (start_ != space_) delete[] start_;
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_DB_FORMAT_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/db/skiplist.h b/src/third_party/wiredtiger/api/leveldb/leveldb/db/skiplist.h
new file mode 100644
index 00000000000..af85be6d016
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/db/skiplist.h
@@ -0,0 +1,379 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Thread safety
+// -------------
+//
+// Writes require external synchronization, most likely a mutex.
+// Reads require a guarantee that the SkipList will not be destroyed
+// while the read is in progress. Apart from that, reads progress
+// without any internal locking or synchronization.
+//
+// Invariants:
+//
+// (1) Allocated nodes are never deleted until the SkipList is
+// destroyed. This is trivially guaranteed by the code since we
+// never delete any skip list nodes.
+//
+// (2) The contents of a Node except for the next/prev pointers are
+// immutable after the Node has been linked into the SkipList.
+// Only Insert() modifies the list, and it is careful to initialize
+// a node and use release-stores to publish the nodes in one or
+// more lists.
+//
+// ... prev vs. next pointer ordering ...
+
+#include <assert.h>
+#include <stdlib.h>
+#include "port/port.h"
+#include "util/arena.h"
+#include "util/random.h"
+
+namespace leveldb {
+
+class Arena;
+
+template<typename Key, class Comparator>
+class SkipList {
+ private:
+ struct Node;
+
+ public:
+ // Create a new SkipList object that will use "cmp" for comparing keys,
+ // and will allocate memory using "*arena". Objects allocated in the arena
+ // must remain allocated for the lifetime of the skiplist object.
+ explicit SkipList(Comparator cmp, Arena* arena);
+
+ // Insert key into the list.
+ // REQUIRES: nothing that compares equal to key is currently in the list.
+ void Insert(const Key& key);
+
+ // Returns true iff an entry that compares equal to key is in the list.
+ bool Contains(const Key& key) const;
+
+ // Iteration over the contents of a skip list
+ class Iterator {
+ public:
+ // Initialize an iterator over the specified list.
+ // The returned iterator is not valid.
+ explicit Iterator(const SkipList* list);
+
+ // Returns true iff the iterator is positioned at a valid node.
+ bool Valid() const;
+
+ // Returns the key at the current position.
+ // REQUIRES: Valid()
+ const Key& key() const;
+
+ // Advances to the next position.
+ // REQUIRES: Valid()
+ void Next();
+
+ // Advances to the previous position.
+ // REQUIRES: Valid()
+ void Prev();
+
+ // Advance to the first entry with a key >= target
+ void Seek(const Key& target);
+
+ // Position at the first entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToFirst();
+
+ // Position at the last entry in list.
+ // Final state of iterator is Valid() iff list is not empty.
+ void SeekToLast();
+
+ private:
+ const SkipList* list_;
+ Node* node_;
+ // Intentionally copyable
+ };
+
+ private:
+ enum { kMaxHeight = 12 };
+
+ // Immutable after construction
+ Comparator const compare_;
+ Arena* const arena_; // Arena used for allocations of nodes
+
+ Node* const head_;
+
+ // Modified only by Insert(). Read racily by readers, but stale
+ // values are ok.
+ port::AtomicPointer max_height_; // Height of the entire list
+
+ inline int GetMaxHeight() const {
+ return static_cast<int>(
+ reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()));
+ }
+
+ // Read/written only by Insert().
+ Random rnd_;
+
+ Node* NewNode(const Key& key, int height);
+ int RandomHeight();
+ bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
+
+ // Return true if key is greater than the data stored in "n"
+ bool KeyIsAfterNode(const Key& key, Node* n) const;
+
+ // Return the earliest node that comes at or after key.
+ // Return NULL if there is no such node.
+ //
+ // If prev is non-NULL, fills prev[level] with pointer to previous
+ // node at "level" for every level in [0..max_height_-1].
+ Node* FindGreaterOrEqual(const Key& key, Node** prev) const;
+
+ // Return the latest node with a key < key.
+ // Return head_ if there is no such node.
+ Node* FindLessThan(const Key& key) const;
+
+ // Return the last node in the list.
+ // Return head_ if list is empty.
+ Node* FindLast() const;
+
+ // No copying allowed
+ SkipList(const SkipList&);
+ void operator=(const SkipList&);
+};
+
+// Implementation details follow
+template<typename Key, class Comparator>
+struct SkipList<Key,Comparator>::Node {
+ explicit Node(const Key& k) : key(k) { }
+
+ Key const key;
+
+ // Accessors/mutators for links. Wrapped in methods so we can
+ // add the appropriate barriers as necessary.
+ Node* Next(int n) {
+ assert(n >= 0);
+ // Use an 'acquire load' so that we observe a fully initialized
+ // version of the returned Node.
+ return reinterpret_cast<Node*>(next_[n].Acquire_Load());
+ }
+ void SetNext(int n, Node* x) {
+ assert(n >= 0);
+ // Use a 'release store' so that anybody who reads through this
+ // pointer observes a fully initialized version of the inserted node.
+ next_[n].Release_Store(x);
+ }
+
+ // No-barrier variants that can be safely used in a few locations.
+ Node* NoBarrier_Next(int n) {
+ assert(n >= 0);
+ return reinterpret_cast<Node*>(next_[n].NoBarrier_Load());
+ }
+ void NoBarrier_SetNext(int n, Node* x) {
+ assert(n >= 0);
+ next_[n].NoBarrier_Store(x);
+ }
+
+ private:
+ // Array of length equal to the node height. next_[0] is lowest level link.
+ port::AtomicPointer next_[1];
+};
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::NewNode(const Key& key, int height) {
+ char* mem = arena_->AllocateAligned(
+ sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1));
+ return new (mem) Node(key);
+}
+
+template<typename Key, class Comparator>
+inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) {
+ list_ = list;
+ node_ = NULL;
+}
+
+template<typename Key, class Comparator>
+inline bool SkipList<Key,Comparator>::Iterator::Valid() const {
+ return node_ != NULL;
+}
+
+template<typename Key, class Comparator>
+inline const Key& SkipList<Key,Comparator>::Iterator::key() const {
+ assert(Valid());
+ return node_->key;
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Next() {
+ assert(Valid());
+ node_ = node_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Prev() {
+ // Instead of using explicit "prev" links, we just search for the
+ // last node that falls before key.
+ assert(Valid());
+ node_ = list_->FindLessThan(node_->key);
+ if (node_ == list_->head_) {
+ node_ = NULL;
+ }
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) {
+ node_ = list_->FindGreaterOrEqual(target, NULL);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() {
+ node_ = list_->head_->Next(0);
+}
+
+template<typename Key, class Comparator>
+inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
+ node_ = list_->FindLast();
+ if (node_ == list_->head_) {
+ node_ = NULL;
+ }
+}
+
+template<typename Key, class Comparator>
+int SkipList<Key,Comparator>::RandomHeight() {
+ // Increase height with probability 1 in kBranching
+ static const unsigned int kBranching = 4;
+ int height = 1;
+ while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
+ height++;
+ }
+ assert(height > 0);
+ assert(height <= kMaxHeight);
+ return height;
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
+ // NULL n is considered infinite
+ return (n != NULL) && (compare_(n->key, key) < 0);
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev)
+ const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ Node* next = x->Next(level);
+ if (KeyIsAfterNode(key, next)) {
+ // Keep searching in this list
+ x = next;
+ } else {
+ if (prev != NULL) prev[level] = x;
+ if (level == 0) {
+ return next;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node*
+SkipList<Key,Comparator>::FindLessThan(const Key& key) const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ assert(x == head_ || compare_(x->key, key) < 0);
+ Node* next = x->Next(level);
+ if (next == NULL || compare_(next->key, key) >= 0) {
+ if (level == 0) {
+ return x;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ } else {
+ x = next;
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
+ const {
+ Node* x = head_;
+ int level = GetMaxHeight() - 1;
+ while (true) {
+ Node* next = x->Next(level);
+ if (next == NULL) {
+ if (level == 0) {
+ return x;
+ } else {
+ // Switch to next list
+ level--;
+ }
+ } else {
+ x = next;
+ }
+ }
+}
+
+template<typename Key, class Comparator>
+SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
+ : compare_(cmp),
+ arena_(arena),
+ head_(NewNode(0 /* any key will do */, kMaxHeight)),
+ max_height_(reinterpret_cast<void*>(1)),
+ rnd_(0xdeadbeef) {
+ for (int i = 0; i < kMaxHeight; i++) {
+ head_->SetNext(i, NULL);
+ }
+}
+
+template<typename Key, class Comparator>
+void SkipList<Key,Comparator>::Insert(const Key& key) {
+ // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
+ // here since Insert() is externally synchronized.
+ Node* prev[kMaxHeight];
+ Node* x = FindGreaterOrEqual(key, prev);
+
+ // Our data structure does not allow duplicate insertion
+ assert(x == NULL || !Equal(key, x->key));
+
+ int height = RandomHeight();
+ if (height > GetMaxHeight()) {
+ for (int i = GetMaxHeight(); i < height; i++) {
+ prev[i] = head_;
+ }
+ //fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
+
+ // It is ok to mutate max_height_ without any synchronization
+ // with concurrent readers. A concurrent reader that observes
+ // the new value of max_height_ will see either the old value of
+ // new level pointers from head_ (NULL), or a new value set in
+ // the loop below. In the former case the reader will
+ // immediately drop to the next level since NULL sorts after all
+ // keys. In the latter case the reader will use the new node.
+ max_height_.NoBarrier_Store(reinterpret_cast<void*>(height));
+ }
+
+ x = NewNode(key, height);
+ for (int i = 0; i < height; i++) {
+ // NoBarrier_SetNext() suffices since we will add a barrier when
+ // we publish a pointer to "x" in prev[i].
+ x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));
+ prev[i]->SetNext(i, x);
+ }
+}
+
+template<typename Key, class Comparator>
+bool SkipList<Key,Comparator>::Contains(const Key& key) const {
+ Node* x = FindGreaterOrEqual(key, NULL);
+ if (x != NULL && Equal(key, x->key)) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch.cc
new file mode 100644
index 00000000000..0a11cb10f33
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch.cc
@@ -0,0 +1,110 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch::rep_ :=
+// sequence: fixed64
+// count: fixed32
+// data: record[count]
+// record :=
+// kTypeValue varstring varstring |
+// kTypeDeletion varstring
+// varstring :=
+// len: varint32
+// data: uint8[len]
+
+#include "leveldb_wt.h"
+
+#include "db/write_batch_internal.h"
+
+namespace leveldb {
+
+// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
+static const size_t kHeader = 12;
+
+WriteBatch::WriteBatch() {
+ Clear();
+}
+
+WriteBatch::~WriteBatch() { }
+
+WriteBatch::Handler::~Handler() { }
+
+void WriteBatch::Clear() {
+ rep_.clear();
+ rep_.resize(kHeader);
+}
+
+Status WriteBatch::Iterate(Handler* handler) const {
+ Slice input(rep_);
+ if (input.size() < kHeader) {
+ return Status::Corruption("malformed WriteBatch (too small)");
+ }
+
+ input.remove_prefix(kHeader);
+ Slice key, value;
+ int found = 0;
+ while (!input.empty()) {
+ found++;
+ char tag = input[0];
+ input.remove_prefix(1);
+ switch (tag) {
+ case kTypeValue:
+ if (GetLengthPrefixedSlice(&input, &key) &&
+ GetLengthPrefixedSlice(&input, &value)) {
+ handler->Put(key, value);
+ } else {
+ return Status::Corruption("bad WriteBatch Put");
+ }
+ break;
+ case kTypeDeletion:
+ if (GetLengthPrefixedSlice(&input, &key)) {
+ handler->Delete(key);
+ } else {
+ return Status::Corruption("bad WriteBatch Delete");
+ }
+ break;
+ default:
+ return Status::Corruption("unknown WriteBatch tag");
+ }
+ }
+ if (found != WriteBatchInternal::Count(this)) {
+ return Status::Corruption("WriteBatch has wrong count");
+ } else {
+ return Status::OK();
+ }
+}
+
+int WriteBatchInternal::Count(const WriteBatch* b) {
+ return DecodeFixed32(b->rep_.data() + 8);
+}
+
+void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
+ EncodeFixed32(&b->rep_[8], n);
+}
+
+void WriteBatch::Put(const Slice& key, const Slice& value) {
+ WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+ rep_.push_back(static_cast<char>(kTypeValue));
+ PutLengthPrefixedSlice(&rep_, key);
+ PutLengthPrefixedSlice(&rep_, value);
+}
+
+void WriteBatch::Delete(const Slice& key) {
+ WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1);
+ rep_.push_back(static_cast<char>(kTypeDeletion));
+ PutLengthPrefixedSlice(&rep_, key);
+}
+
+void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
+ assert(contents.size() >= kHeader);
+ b->rep_.assign(contents.data(), contents.size());
+}
+
+void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
+ SetCount(dst, Count(dst) + Count(src));
+ assert(src->rep_.size() >= kHeader);
+ dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch_internal.h b/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch_internal.h
new file mode 100644
index 00000000000..c8421cce124
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/db/write_batch_internal.h
@@ -0,0 +1,53 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
+
+#include "leveldb_wt.h"
+#include "db/dbformat.h"
+
+namespace leveldb {
+
+// WriteBatchInternal provides static methods for manipulating a
+// WriteBatch that we don't want in the public WriteBatch interface.
+class WriteBatchInternal {
+ public:
+#ifdef HAVE_ROCKSDB
+ // WriteBatch methods with column_family_id instead of ColumnFamilyHandle*
+ static void Put(WriteBatch* batch, uint32_t column_family_id,
+ const Slice& key, const Slice& value);
+
+ static void Put(WriteBatch* batch, uint32_t column_family_id,
+ const SliceParts& key, const SliceParts& value);
+
+ static void Delete(WriteBatch* batch, uint32_t column_family_id,
+ const Slice& key);
+
+ static void Merge(WriteBatch* batch, uint32_t column_family_id,
+ const Slice& key, const Slice& value);
+#endif
+ // Return the number of entries in the batch.
+ static int Count(const WriteBatch* batch);
+
+ // Set the count for the number of entries in the batch.
+ static void SetCount(WriteBatch* batch, int n);
+
+ static Slice Contents(const WriteBatch* batch) {
+ return Slice(batch->rep_);
+ }
+
+ static size_t ByteSize(const WriteBatch* batch) {
+ return batch->rep_.size();
+ }
+
+ static void SetContents(WriteBatch* batch, const Slice& contents);
+
+ static void Append(WriteBatch* dst, const WriteBatch* src);
+};
+
+} // namespace leveldb
+
+
+#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/cache.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/cache.h
new file mode 100644
index 00000000000..94be8e919a8
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/cache.h
@@ -0,0 +1,110 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Cache is an interface that maps keys to values. It has internal
+// synchronization and may be safely accessed concurrently from
+// multiple threads. It may automatically evict entries to make room
+// for new entries. Values have a specified charge against the cache
+// capacity. For example, a cache where the values are variable
+// length strings, may use the length of the string as the charge for
+// the string.
+//
+// A builtin cache implementation with a least-recently-used eviction
+// policy is provided. Clients may use their own implementations if
+// they want something more sophisticated (like scan-resistance, a
+// custom eviction policy, variable cache sizing, etc.)
+
+#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_
+#define STORAGE_LEVELDB_INCLUDE_CACHE_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <memory>
+#include <stdint.h>
+#include "slice.h"
+
+namespace leveldb {
+
+class Cache;
+
+// Create a new cache with a fixed size capacity. This implementation
+// of Cache uses a least-recently-used eviction policy.
+extern Cache* NewLRUCache(size_t capacity);
+#ifdef HAVE_ROCKSDB
+extern Cache* NewLRUCache(size_t capacity, int numSharedBits);
+extern Cache* NewLRUCache(size_t capacity, int numSharedBits,
+ int removeScanCountLimit);
+#endif
+
+class Cache {
+ public:
+ Cache() { }
+
+ // Destroys all existing entries by calling the "deleter"
+ // function that was passed to the constructor.
+ virtual ~Cache();
+
+ // Opaque handle to an entry stored in the cache.
+ struct Handle { };
+
+ // Insert a mapping from key->value into the cache and assign it
+ // the specified charge against the total cache capacity.
+ //
+ // Returns a handle that corresponds to the mapping. The caller
+ // must call this->Release(handle) when the returned mapping is no
+ // longer needed.
+ //
+ // When the inserted entry is no longer needed, the key and
+ // value will be passed to "deleter".
+ virtual Handle* Insert(const Slice& key, void* value, size_t charge,
+ void (*deleter)(const Slice& key, void* value)) = 0;
+
+ // If the cache has no mapping for "key", returns NULL.
+ //
+ // Else return a handle that corresponds to the mapping. The caller
+ // must call this->Release(handle) when the returned mapping is no
+ // longer needed.
+ virtual Handle* Lookup(const Slice& key) = 0;
+
+ // Release a mapping returned by a previous Lookup().
+ // REQUIRES: handle must not have been released yet.
+ // REQUIRES: handle must have been returned by a method on *this.
+ virtual void Release(Handle* handle) = 0;
+
+ // Return the value encapsulated in a handle returned by a
+ // successful Lookup().
+ // REQUIRES: handle must not have been released yet.
+ // REQUIRES: handle must have been returned by a method on *this.
+ virtual void* Value(Handle* handle) = 0;
+
+ // If the cache contains entry for key, erase it. Note that the
+ // underlying entry will be kept around until all existing handles
+ // to it have been released.
+ virtual void Erase(const Slice& key) = 0;
+
+ // Return a new numeric id. May be used by multiple clients who are
+ // sharing the same cache to partition the key space. Typically the
+ // client will allocate a new id at startup and prepend the id to
+ // its cache keys.
+ virtual uint64_t NewId() = 0;
+
+ private:
+ void LRU_Remove(Handle* e);
+ void LRU_Append(Handle* e);
+ void Unref(Handle* e);
+
+ struct Rep;
+ Rep* rep_;
+
+ // No copying allowed
+ Cache(const Cache&);
+ void operator=(const Cache&);
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_CACHE_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/comparator.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/comparator.h
new file mode 100644
index 00000000000..78d83a4d08e
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/comparator.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_
+#define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <stdint.h>
+#include <string>
+
+namespace leveldb {
+
+class Slice;
+
+// A Comparator object provides a total order across slices that are
+// used as keys in an sstable or a database. A Comparator implementation
+// must be thread-safe since leveldb may invoke its methods concurrently
+// from multiple threads.
+class Comparator {
+ public:
+ virtual ~Comparator();
+
+ // Three-way comparison. Returns value:
+ // < 0 iff "a" < "b",
+ // == 0 iff "a" == "b",
+ // > 0 iff "a" > "b"
+ virtual int Compare(const Slice& a, const Slice& b) const = 0;
+
+ // The name of the comparator. Used to check for comparator
+ // mismatches (i.e., a DB created with one comparator is
+ // accessed using a different comparator.
+ //
+ // The client of this package should switch to a new name whenever
+ // the comparator implementation changes in a way that will cause
+ // the relative ordering of any two keys to change.
+ //
+ // Names starting with "leveldb." are reserved and should not be used
+ // by any clients of this package.
+ virtual const char* Name() const = 0;
+
+ // Advanced functions: these are used to reduce the space requirements
+ // for internal data structures like index blocks.
+
+ // If *start < limit, changes *start to a short string in [start,limit).
+ // Simple comparator implementations may return with *start unchanged,
+ // i.e., an implementation of this method that does nothing is correct.
+ virtual void FindShortestSeparator(
+ std::string* start,
+ const Slice& limit) const = 0;
+
+ // Changes *key to a short string >= *key.
+ // Simple comparator implementations may return with *key unchanged,
+ // i.e., an implementation of this method that does nothing is correct.
+ virtual void FindShortSuccessor(std::string* key) const = 0;
+
+#ifdef HAVE_HYPERLEVELDB
+ // If unsure, return 0;
+ virtual uint64_t KeyNum(const Slice& key) const;
+#endif
+};
+
+// Return a builtin comparator that uses lexicographic byte-wise
+// ordering. The result remains the property of this module and
+// must not be deleted.
+extern const Comparator* BytewiseComparator();
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/db.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/db.h
new file mode 100644
index 00000000000..df8fcbbe9f8
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/db.h
@@ -0,0 +1,350 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_DB_H_
+#define STORAGE_LEVELDB_INCLUDE_DB_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <memory>
+#include <stdint.h>
+#include <stdio.h>
+#include <vector>
+#include "iterator.h"
+#include "options.h"
+#include "write_batch.h"
+#ifdef HAVE_HYPERLEVELDB
+#include "replay_iterator.h"
+#endif
+
+namespace leveldb {
+
+// Update Makefile if you change these
+static const int kMajorVersion = 1;
+static const int kMinorVersion = 17;
+
+struct ReadOptions;
+struct WriteOptions;
+class WriteBatch;
+
+#ifdef HAVE_ROCKSDB
+struct FlushOptions;
+class ColumnFamilyHandle {
+ public:
+ virtual ~ColumnFamilyHandle() {}
+};
+extern const std::string kDefaultColumnFamilyName;
+
+struct ColumnFamilyDescriptor {
+ std::string name;
+ ColumnFamilyOptions options;
+ ColumnFamilyDescriptor()
+ : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
+ ColumnFamilyDescriptor(const std::string& _name,
+ const ColumnFamilyOptions& _options)
+ : name(_name), options(_options) {}
+};
+#endif
+
+// Abstract handle to particular state of a DB.
+// A Snapshot is an immutable object and can therefore be safely
+// accessed from multiple threads without any external synchronization.
+class Snapshot {
+ protected:
+ virtual ~Snapshot();
+};
+
+// A range of keys
+struct Range {
+ Slice start; // Included in the range
+ Slice limit; // Not included in the range
+
+ Range() { }
+ Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
+};
+
+#if HAVE_BASHOLEVELDB
+// Abstract holder for a DB value.
+// This allows callers to manage their own value buffers and have
+// DB values copied directly into those buffers.
+class Value {
+ public:
+ virtual Value& assign(const char* data, size_t size) = 0;
+
+ protected:
+ virtual ~Value();
+};
+#endif
+
+// A DB is a persistent ordered map from keys to values.
+// A DB is safe for concurrent access from multiple threads without
+// any external synchronization.
+class DB {
+ public:
+ // Open the database with the specified "name".
+ // Stores a pointer to a heap-allocated database in *dbptr and returns
+ // OK on success.
+ // Stores NULL in *dbptr and returns a non-OK status on error.
+ // Caller should delete *dbptr when it is no longer needed.
+ static Status Open(const Options& options,
+ const std::string& name,
+ DB** dbptr);
+
+#ifdef HAVE_ROCKSDB
+ // Open DB with column families.
+ // db_options specify database specific options
+ // column_families is the vector of all column families in the databse,
+ // containing column family name and options. You need to open ALL column
+ // families in the database. To get the list of column families, you can use
+ // ListColumnFamilies(). Also, you can open only a subset of column families
+ // for read-only access.
+ // The default column family name is 'default' and it's stored
+ // in rocksdb::kDefaultColumnFamilyName.
+ // If everything is OK, handles will on return be the same size
+ // as column_families --- handles[i] will be a handle that you
+ // will use to operate on column family column_family[i]
+ static Status Open(const Options& db_options, const std::string& name,
+ const std::vector<ColumnFamilyDescriptor>& column_families,
+ std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
+
+ // ListColumnFamilies will open the DB specified by argument name
+ // and return the list of all column families in that DB
+ // through column_families argument. The ordering of
+ // column families in column_families is unspecified.
+ static Status ListColumnFamilies(const Options& db_options,
+ const std::string& name,
+ std::vector<std::string>* column_families);
+
+ // Create a column_family and return the handle of column family
+ // through the argument handle.
+ virtual Status CreateColumnFamily(const Options& options,
+ const std::string& column_family_name,
+ ColumnFamilyHandle** handle) = 0;
+
+ // Drop a column family specified by column_family handle. This call
+ // only records a drop record in the manifest and prevents the column
+ // family from flushing and compacting.
+ virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) = 0;
+
+ // Set the database entry for "key" to "value".
+ // Returns OK on success, and a non-OK status on error.
+ // Note: consider setting options.sync = true.
+ virtual Status Put(const WriteOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) = 0;
+
+ // Remove the database entry (if any) for "key". Returns OK on
+ // success, and a non-OK status on error. It is not an error if "key"
+ // did not exist in the database.
+ // Note: consider setting options.sync = true.
+ virtual Status Delete(const WriteOptions& options,
+ ColumnFamilyHandle* column_family,
+ const Slice& key) = 0;
+
+ // Merge the database entry for "key" with "value". Returns OK on success,
+ // and a non-OK status on error. The semantics of this operation is
+ // determined by the user provided merge_operator when opening DB.
+ // Note: consider setting options.sync = true.
+ virtual Status Merge(const WriteOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) = 0;
+
+ // May return some other Status on an error.
+ virtual Status Get(const ReadOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ std::string* value) = 0;
+
+ // If keys[i] does not exist in the database, then the i'th returned
+ // status will be one for which Status::IsNotFound() is true, and
+ // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
+ // the i'th returned status will have Status::ok() true, and (*values)[i]
+ // will store the value associated with keys[i].
+ //
+ // (*values) will always be resized to be the same size as (keys).
+ // Similarly, the number of returned statuses will be the number of keys.
+ // Note: keys will not be "de-duplicated". Duplicate keys will return
+ // duplicate values in order.
+ virtual std::vector<Status> MultiGet(
+ const ReadOptions& options,
+ const std::vector<ColumnFamilyHandle*>& column_family,
+ const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+
+ // If the key definitely does not exist in the database, then this method
+ // returns false, else true. If the caller wants to obtain value when the key
+ // is found in memory, a bool for 'value_found' must be passed. 'value_found'
+ // will be true on return if value has been set properly.
+ // This check is potentially lighter-weight than invoking DB::Get(). One way
+ // to make this lighter weight is to avoid doing any IOs.
+ // Default implementation here returns true and sets 'value_found' to false
+ virtual bool KeyMayExist(const ReadOptions&,
+ ColumnFamilyHandle*, const Slice&,
+ std::string*, bool* value_found = NULL) {
+ if (value_found != NULL) {
+ *value_found = false;
+ }
+ return true;
+ }
+
+ virtual Iterator* NewIterator(const ReadOptions& options,
+ ColumnFamilyHandle* column_family) = 0;
+
+ virtual bool GetProperty(ColumnFamilyHandle* column_family,
+ const Slice& property, std::string* value) = 0;
+
+ // Flush all mem-table data.
+ virtual Status Flush(const FlushOptions& options,
+ ColumnFamilyHandle* column_family) = 0;
+#endif
+
+ DB() { }
+ virtual ~DB();
+
+ // Set the database entry for "key" to "value". Returns OK on success,
+ // and a non-OK status on error.
+ // Note: consider setting options.sync = true.
+ virtual Status Put(const WriteOptions& options,
+ const Slice& key,
+ const Slice& value) = 0;
+
+ // Remove the database entry (if any) for "key". Returns OK on
+ // success, and a non-OK status on error. It is not an error if "key"
+ // did not exist in the database.
+ // Note: consider setting options.sync = true.
+ virtual Status Delete(const WriteOptions& options, const Slice& key) = 0;
+
+ // Apply the specified updates to the database.
+ // Returns OK on success, non-OK on failure.
+ // Note: consider setting options.sync = true.
+ virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;
+
+ // If the database contains an entry for "key" store the
+ // corresponding value in *value and return OK.
+ //
+ // If there is no entry for "key" leave *value unchanged and return
+ // a status for which Status::IsNotFound() returns true.
+ //
+ // May return some other Status on an error.
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, std::string* value) = 0;
+#if HAVE_BASHOLEVELDB
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, Value* value) = 0;
+#endif
+
+ // Return a heap-allocated iterator over the contents of the database.
+ // The result of NewIterator() is initially invalid (caller must
+ // call one of the Seek methods on the iterator before using it).
+ //
+ // Caller should delete the iterator when it is no longer needed.
+ // The returned iterator should be deleted before this db is deleted.
+ virtual Iterator* NewIterator(const ReadOptions& options) = 0;
+
+ // Return a handle to the current DB state. Iterators created with
+ // this handle will all observe a stable snapshot of the current DB
+ // state. The caller must call ReleaseSnapshot(result) when the
+ // snapshot is no longer needed.
+ virtual const Snapshot* GetSnapshot() = 0;
+
+ // Release a previously acquired snapshot. The caller must not
+ // use "snapshot" after this call.
+ virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;
+
+ // DB implementations can export properties about their state
+ // via this method. If "property" is a valid property understood by this
+ // DB implementation, fills "*value" with its current value and returns
+ // true. Otherwise returns false.
+ //
+ //
+ // Valid property names include:
+ //
+ // "leveldb.num-files-at-level<N>" - return the number of files at level <N>,
+ // where <N> is an ASCII representation of a level number (e.g. "0").
+ // "leveldb.stats" - returns a multi-line string that describes statistics
+ // about the internal operation of the DB.
+ // "leveldb.sstables" - returns a multi-line string that describes all
+ // of the sstables that make up the db contents.
+ virtual bool GetProperty(const Slice& property, std::string* value) = 0;
+
+ // For each i in [0,n-1], store in "sizes[i]", the approximate
+ // file system space used by keys in "[range[i].start .. range[i].limit)".
+ //
+ // Note that the returned sizes measure file system space usage, so
+ // if the user data compresses by a factor of ten, the returned
+ // sizes will be one-tenth the size of the corresponding user data size.
+ //
+ // The results may not include the sizes of recently written data.
+ virtual void GetApproximateSizes(const Range* range, int n,
+ uint64_t* sizes) = 0;
+
+ // Compact the underlying storage for the key range [*begin,*end].
+ // In particular, deleted and overwritten versions are discarded,
+ // and the data is rearranged to reduce the cost of operations
+ // needed to access the data. This operation should typically only
+ // be invoked by users who understand the underlying implementation.
+ //
+ // begin==NULL is treated as a key before all keys in the database.
+ // end==NULL is treated as a key after all keys in the database.
+ // Therefore the following call will compact the entire database:
+ // db->CompactRange(NULL, NULL);
+ virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
+
+ // Suspends the background compaction thread. This methods
+ // returns once suspended.
+ virtual void SuspendCompactions() = 0;
+ // Resumes a suspended background compation thread.
+ virtual void ResumeCompactions() = 0;
+
+#ifdef HAVE_HYPERLEVELDB
+ // Create a live backup of a live LevelDB instance.
+ // The backup is stored in a directory named "backup-<name>" under the top
+ // level of the open LevelDB database. The implementation is permitted, and
+ // even encouraged, to improve the performance of this call through
+ // hard-links.
+ virtual Status LiveBackup(const Slice& name) = 0;
+
+ // Return an opaque timestamp that identifies the current point in time of the
+ // database. This timestamp may be subsequently presented to the
+ // NewReplayIterator method to create a ReplayIterator.
+ virtual void GetReplayTimestamp(std::string* timestamp) = 0;
+
+ // Set the lower bound for manual garbage collection. This method only takes
+ // effect when Options.manual_garbage_collection is true.
+ virtual void AllowGarbageCollectBeforeTimestamp(const std::string& timestamp) = 0;
+
+ // Validate the timestamp
+ virtual bool ValidateTimestamp(const std::string& timestamp) = 0;
+
+ // Compare two timestamps and return -1, 0, 1 for lt, eq, gt
+ virtual int CompareTimestamps(const std::string& lhs, const std::string& rhs) = 0;
+
+ // Return a ReplayIterator that returns every write operation performed after
+ // the timestamp.
+ virtual Status GetReplayIterator(const std::string& timestamp,
+ ReplayIterator** iter) = 0;
+
+ // Release a previously allocated replay iterator.
+ virtual void ReleaseReplayIterator(ReplayIterator* iter) = 0;
+#endif
+ private:
+ // No copying allowed
+ DB(const DB&);
+ void operator=(const DB&);
+};
+
+// Destroy the contents of the specified database.
+// Be very careful using this method.
+Status DestroyDB(const std::string& name, const Options& options);
+
+// If a DB cannot be opened, you may attempt to call this method to
+// resurrect as much of the contents of the database as possible.
+// Some data may be lost, so be careful when calling this function
+// on a database that contains important information.
+Status RepairDB(const std::string& dbname, const Options& options);
+
+}; // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_DB_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/env.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/env.h
new file mode 100644
index 00000000000..4ad67d36fea
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/env.h
@@ -0,0 +1,349 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An Env is an interface used by the leveldb implementation to access
+// operating system functionality like the filesystem etc. Callers
+// may wish to provide a custom Env object when opening a database to
+// get fine gain control; e.g., to rate limit file system operations.
+//
+// All Env implementations are safe for concurrent access from
+// multiple threads without any external synchronization.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_
+#define STORAGE_LEVELDB_INCLUDE_ENV_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <string>
+#include <vector>
+#include <stdarg.h>
+#include <stdint.h>
+#if HAVE_BASHOLEVELDB
+#include "perf_count.h"
+#endif
+#include "status.h"
+
+namespace leveldb {
+
+class FileLock;
+class Logger;
+class RandomAccessFile;
+class SequentialFile;
+class Slice;
+class WritableFile;
+
+class Env {
+ public:
+ Env() { }
+ virtual ~Env();
+
+ // Return a default environment suitable for the current operating
+ // system. Sophisticated users may wish to provide their own Env
+ // implementation instead of relying on this default environment.
+ //
+ // The result of Default() belongs to leveldb and must never be deleted.
+ static Env* Default();
+
+ // Create a brand new sequentially-readable file with the specified name.
+ // On success, stores a pointer to the new file in *result and returns OK.
+ // On failure stores NULL in *result and returns non-OK. If the file does
+ // not exist, returns a non-OK status.
+ //
+ // The returned file will only be accessed by one thread at a time.
+ virtual Status NewSequentialFile(const std::string& fname,
+ SequentialFile** result) = 0;
+
+ // Create a brand new random access read-only file with the
+ // specified name. On success, stores a pointer to the new file in
+ // *result and returns OK. On failure stores NULL in *result and
+ // returns non-OK. If the file does not exist, returns a non-OK
+ // status.
+ //
+ // The returned file may be concurrently accessed by multiple threads.
+ virtual Status NewRandomAccessFile(const std::string& fname,
+ RandomAccessFile** result) = 0;
+
+ // Create an object that writes to a new file with the specified
+ // name. Deletes any existing file with the same name and creates a
+ // new file. On success, stores a pointer to the new file in
+ // *result and returns OK. On failure stores NULL in *result and
+ // returns non-OK.
+ //
+ // The returned file will only be accessed by one thread at a time.
+ virtual Status NewWritableFile(const std::string& fname,
+ WritableFile** result) = 0;
+
+ // Returns true iff the named file exists.
+ virtual bool FileExists(const std::string& fname) = 0;
+
+ // Store in *result the names of the children of the specified directory.
+ // The names are relative to "dir".
+ // Original contents of *results are dropped.
+ virtual Status GetChildren(const std::string& dir,
+ std::vector<std::string>* result) = 0;
+
+ // Delete the named file.
+ virtual Status DeleteFile(const std::string& fname) = 0;
+
+ // Create the specified directory.
+ virtual Status CreateDir(const std::string& dirname) = 0;
+
+ // Delete the specified directory.
+ virtual Status DeleteDir(const std::string& dirname) = 0;
+
+ // Store the size of fname in *file_size.
+ virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;
+
+ // Rename file src to target.
+ virtual Status RenameFile(const std::string& src,
+ const std::string& target) = 0;
+
+ // Lock the specified file. Used to prevent concurrent access to
+ // the same db by multiple processes. On failure, stores NULL in
+ // *lock and returns non-OK.
+ //
+ // On success, stores a pointer to the object that represents the
+ // acquired lock in *lock and returns OK. The caller should call
+ // UnlockFile(*lock) to release the lock. If the process exits,
+ // the lock will be automatically released.
+ //
+ // If somebody else already holds the lock, finishes immediately
+ // with a failure. I.e., this call does not wait for existing locks
+ // to go away.
+ //
+ // May create the named file if it does not already exist.
+ virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;
+
+ // Release the lock acquired by a previous successful call to LockFile.
+ // REQUIRES: lock was returned by a successful LockFile() call
+ // REQUIRES: lock has not already been unlocked.
+ virtual Status UnlockFile(FileLock* lock) = 0;
+
+ // Arrange to run "(*function)(arg)" once in a background thread.
+ //
+ // "function" may run in an unspecified thread. Multiple functions
+ // added to the same Env may run concurrently in different threads.
+ // I.e., the caller may not assume that background work items are
+ // serialized.
+ virtual void Schedule(
+ void (*function)(void* arg),
+ void* arg) = 0;
+
+ // Start a new thread, invoking "function(arg)" within the new thread.
+ // When "function(arg)" returns, the thread will be destroyed.
+ virtual void StartThread(void (*function)(void* arg), void* arg) = 0;
+
+ // *path is set to a temporary directory that can be used for testing. It may
+ // or many not have just been created. The directory may or may not differ
+ // between runs of the same process, but subsequent calls will return the
+ // same directory.
+ virtual Status GetTestDirectory(std::string* path) = 0;
+
+ // Create and return a log file for storing informational messages.
+ virtual Status NewLogger(const std::string& fname, Logger** result) = 0;
+
+ // Returns the number of micro-seconds since some fixed point in time. Only
+ // useful for computing deltas of time.
+ virtual uint64_t NowMicros() = 0;
+
+ // Sleep/delay the thread for the perscribed number of micro-seconds.
+ virtual void SleepForMicroseconds(int micros) = 0;
+
+#if HAVE_BASHOLEVELDB
+ // Riak specific: Where supported, give count of background jobs pending.
+ virtual int GetBackgroundBacklog() const {return(0);};
+
+ // Riak specific: Get object that is tracking various software counters
+ virtual PerformanceCounters * GetPerformanceCounters() {return(gPerfCounters);}
+#endif
+
+ private:
+ // No copying allowed
+ Env(const Env&);
+ void operator=(const Env&);
+};
+
+// A file abstraction for reading sequentially through a file
+class SequentialFile {
+ public:
+ SequentialFile() { }
+ virtual ~SequentialFile();
+
+ // Read up to "n" bytes from the file. "scratch[0..n-1]" may be
+ // written by this routine. Sets "*result" to the data that was
+ // read (including if fewer than "n" bytes were successfully read).
+ // May set "*result" to point at data in "scratch[0..n-1]", so
+ // "scratch[0..n-1]" must be live when "*result" is used.
+ // If an error was encountered, returns a non-OK status.
+ //
+ // REQUIRES: External synchronization
+ virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
+
+ // Skip "n" bytes from the file. This is guaranteed to be no
+ // slower that reading the same data, but may be faster.
+ //
+ // If end of file is reached, skipping will stop at the end of the
+ // file, and Skip will return OK.
+ //
+ // REQUIRES: External synchronization
+ virtual Status Skip(uint64_t n) = 0;
+
+ private:
+ // No copying allowed
+ SequentialFile(const SequentialFile&);
+ void operator=(const SequentialFile&);
+};
+
+// A file abstraction for randomly reading the contents of a file.
+class RandomAccessFile {
+ public:
+ RandomAccessFile() { }
+ virtual ~RandomAccessFile();
+
+ // Read up to "n" bytes from the file starting at "offset".
+ // "scratch[0..n-1]" may be written by this routine. Sets "*result"
+ // to the data that was read (including if fewer than "n" bytes were
+ // successfully read). May set "*result" to point at data in
+ // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
+ // "*result" is used. If an error was encountered, returns a non-OK
+ // status.
+ //
+ // Safe for concurrent use by multiple threads.
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const = 0;
+
+ private:
+ // No copying allowed
+ RandomAccessFile(const RandomAccessFile&);
+ void operator=(const RandomAccessFile&);
+};
+
+// A file abstraction for sequential writing. The implementation
+// must provide buffering since callers may append small fragments
+// at a time to the file.
+class WritableFile {
+ public:
+ WritableFile() { }
+ virtual ~WritableFile();
+
+ virtual Status Append(const Slice& data) = 0;
+ virtual Status Close() = 0;
+ virtual Status Flush() = 0;
+ virtual Status Sync() = 0;
+
+ private:
+ // No copying allowed
+ WritableFile(const WritableFile&);
+ void operator=(const WritableFile&);
+};
+
+// An interface for writing log messages.
+class Logger {
+ public:
+ Logger() { }
+ virtual ~Logger();
+
+ // Write an entry to the log file with the specified format.
+ virtual void Logv(const char* format, va_list ap) = 0;
+
+ private:
+ // No copying allowed
+ Logger(const Logger&);
+ void operator=(const Logger&);
+};
+
+
+// Identifies a locked file.
+class FileLock {
+ public:
+ FileLock() { }
+ virtual ~FileLock();
+ private:
+ // No copying allowed
+ FileLock(const FileLock&);
+ void operator=(const FileLock&);
+};
+
+// Log the specified data to *info_log if info_log is non-NULL.
+extern void Log(Logger* info_log, const char* format, ...)
+# if defined(__GNUC__) || defined(__clang__)
+ __attribute__((__format__ (__printf__, 2, 3)))
+# endif
+ ;
+
+// A utility routine: write "data" to the named file.
+extern Status WriteStringToFile(Env* env, const Slice& data,
+ const std::string& fname);
+
+// A utility routine: read contents of named file into *data
+extern Status ReadFileToString(Env* env, const std::string& fname,
+ std::string* data);
+
+// An implementation of Env that forwards all calls to another Env.
+// May be useful to clients who wish to override just part of the
+// functionality of another Env.
+class EnvWrapper : public Env {
+ public:
+ // Initialize an EnvWrapper that delegates all calls to *t
+ explicit EnvWrapper(Env* t) : target_(t) { }
+ virtual ~EnvWrapper();
+
+ // Return the target to which this Env forwards all calls
+ Env* target() const { return target_; }
+
+ // The following text is boilerplate that forwards all methods to target()
+ Status NewSequentialFile(const std::string& f, SequentialFile** r) {
+ return target_->NewSequentialFile(f, r);
+ }
+ Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
+ return target_->NewRandomAccessFile(f, r);
+ }
+ Status NewWritableFile(const std::string& f, WritableFile** r) {
+ return target_->NewWritableFile(f, r);
+ }
+ bool FileExists(const std::string& f) { return target_->FileExists(f); }
+ Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
+ return target_->GetChildren(dir, r);
+ }
+ Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); }
+ Status CreateDir(const std::string& d) { return target_->CreateDir(d); }
+ Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); }
+ Status GetFileSize(const std::string& f, uint64_t* s) {
+ return target_->GetFileSize(f, s);
+ }
+ Status RenameFile(const std::string& s, const std::string& t) {
+ return target_->RenameFile(s, t);
+ }
+ Status LockFile(const std::string& f, FileLock** l) {
+ return target_->LockFile(f, l);
+ }
+ Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); }
+ void Schedule(void (*f)(void*), void* a) {
+ return target_->Schedule(f, a);
+ }
+ void StartThread(void (*f)(void*), void* a) {
+ return target_->StartThread(f, a);
+ }
+ virtual Status GetTestDirectory(std::string* path) {
+ return target_->GetTestDirectory(path);
+ }
+ virtual Status NewLogger(const std::string& fname, Logger** result) {
+ return target_->NewLogger(fname, result);
+ }
+ uint64_t NowMicros() {
+ return target_->NowMicros();
+ }
+ void SleepForMicroseconds(int micros) {
+ target_->SleepForMicroseconds(micros);
+ }
+ private:
+ Env* target_;
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_ENV_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/filter_policy.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/filter_policy.h
new file mode 100644
index 00000000000..e434ef4b241
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/filter_policy.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A database can be configured with a custom FilterPolicy object.
+// This object is responsible for creating a small filter from a set
+// of keys. These filters are stored in leveldb and are consulted
+// automatically by leveldb to decide whether or not to read some
+// information from disk. In many cases, a filter can cut down the
+// number of disk seeks form a handful to a single disk seek per
+// DB::Get() call.
+//
+// Most people will want to use the builtin bloom filter support (see
+// NewBloomFilterPolicy() below).
+
+#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
+#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <string>
+
+namespace leveldb {
+
+class Slice;
+
+class FilterPolicy {
+ public:
+ virtual ~FilterPolicy();
+
+ // Return the name of this policy. Note that if the filter encoding
+ // changes in an incompatible way, the name returned by this method
+ // must be changed. Otherwise, old incompatible filters may be
+ // passed to methods of this type.
+ virtual const char* Name() const = 0;
+
+ // keys[0,n-1] contains a list of keys (potentially with duplicates)
+ // that are ordered according to the user supplied comparator.
+ // Append a filter that summarizes keys[0,n-1] to *dst.
+ //
+ // Warning: do not change the initial contents of *dst. Instead,
+ // append the newly constructed filter to *dst.
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst)
+ const = 0;
+
+ // "filter" contains the data appended by a preceding call to
+ // CreateFilter() on this class. This method must return true if
+ // the key was in the list of keys passed to CreateFilter().
+ // This method may return true or false if the key was not on the
+ // list, but it should aim to return false with a high probability.
+ virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
+};
+
+// Return a new filter policy that uses a bloom filter with approximately
+// the specified number of bits per key. A good value for bits_per_key
+// is 10, which yields a filter with ~ 1% false positive rate.
+//
+// Callers must delete the result after any database that is using the
+// result has been closed.
+//
+// Note: if you are using a custom comparator that ignores some parts
+// of the keys being compared, you must not use NewBloomFilterPolicy()
+// and must provide your own FilterPolicy that also ignores the
+// corresponding parts of the keys. For example, if the comparator
+// ignores trailing spaces, it would be incorrect to use a
+// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
+// trailing spaces in keys.
+extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
+#if HAVE_BASHOLEVELDB
+extern const FilterPolicy* NewBloomFilterPolicy2(int bits_per_key);
+#endif
+
+}
+
+#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/iterator.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/iterator.h
new file mode 100644
index 00000000000..2d97d180b17
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/iterator.h
@@ -0,0 +1,105 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// An iterator yields a sequence of key/value pairs from a source.
+// The following class defines the interface. Multiple implementations
+// are provided by this library. In particular, iterators are provided
+// to access the contents of a Table or a DB.
+//
+// Multiple threads can invoke const methods on an Iterator without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Iterator must use
+// external synchronization.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
+#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include "slice.h"
+#include "status.h"
+
+namespace leveldb {
+
+class Iterator {
+ public:
+ Iterator();
+ virtual ~Iterator();
+
+ // An iterator is either positioned at a key/value pair, or
+ // not valid. This method returns true iff the iterator is valid.
+ virtual bool Valid() const = 0;
+
+ // Position at the first key in the source. The iterator is Valid()
+ // after this call iff the source is not empty.
+ virtual void SeekToFirst() = 0;
+
+ // Position at the last key in the source. The iterator is
+ // Valid() after this call iff the source is not empty.
+ virtual void SeekToLast() = 0;
+
+ // Position at the first key in the source that at or past target
+ // The iterator is Valid() after this call iff the source contains
+ // an entry that comes at or past target.
+ virtual void Seek(const Slice& target) = 0;
+
+ // Moves to the next entry in the source. After this call, Valid() is
+ // true iff the iterator was not positioned at the last entry in the source.
+ // REQUIRES: Valid()
+ virtual void Next() = 0;
+
+ // Moves to the previous entry in the source. After this call, Valid() is
+ // true iff the iterator was not positioned at the first entry in source.
+ // REQUIRES: Valid()
+ virtual void Prev() = 0;
+
+ // Return the key for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: Valid()
+ virtual Slice key() const = 0;
+
+ // Return the value for the current entry. The underlying storage for
+ // the returned slice is valid only until the next modification of
+ // the iterator.
+ // REQUIRES: !AtEnd() && !AtStart()
+ virtual Slice value() const = 0;
+
+ // If an error has occurred, return it. Else return an ok status.
+ virtual Status status() const = 0;
+
+ // Clients are allowed to register function/arg1/arg2 triples that
+ // will be invoked when this iterator is destroyed.
+ //
+ // Note that unlike all of the preceding methods, this method is
+ // not abstract and therefore clients should not override it.
+ typedef void (*CleanupFunction)(void* arg1, void* arg2);
+ void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
+
+ private:
+ struct Cleanup {
+ CleanupFunction function;
+ void* arg1;
+ void* arg2;
+ Cleanup* next;
+ };
+ Cleanup cleanup_;
+
+ // No copying allowed
+ Iterator(const Iterator&);
+ void operator=(const Iterator&);
+};
+
+// Return an empty iterator (yields nothing).
+extern Iterator* NewEmptyIterator();
+
+// Return an empty iterator with the specified status.
+extern Iterator* NewErrorIterator(const Status& status);
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_ITERATOR_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/options.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/options.h
new file mode 100644
index 00000000000..9dcf73fc2a0
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/options.h
@@ -0,0 +1,258 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
+#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <memory>
+#include <stddef.h>
+
+namespace leveldb {
+
+class Cache;
+class Comparator;
+class Env;
+class FilterPolicy;
+class Logger;
+class Snapshot;
+
+// DB contents are stored in a set of blocks, each of which holds a
+// sequence of key,value pairs. Each block may be compressed before
+// being stored in a file. The following enum describes which
+// compression method (if any) is used to compress a block.
+enum CompressionType {
+ // NOTE: do not change the values of existing entries, as these are
+ // part of the persistent format on disk.
+ kNoCompression = 0x0,
+ kSnappyCompression = 0x1
+#ifdef HAVE_ROCKSDB
+ , kZlibCompression = 0x2
+#endif
+};
+
+// Options to control the behavior of a database (passed to DB::Open)
+struct Options {
+ // -------------------
+ // Parameters that affect behavior
+
+ // Comparator used to define the order of keys in the table.
+ // Default: a comparator that uses lexicographic byte-wise ordering
+ //
+ // REQUIRES: The client must ensure that the comparator supplied
+ // here has the same name and orders keys *exactly* the same as the
+ // comparator provided to previous open calls on the same DB.
+ const Comparator* comparator;
+
+ // If true, the database will be created if it is missing.
+ // Default: false
+ bool create_if_missing;
+
+ // If true, an error is raised if the database already exists.
+ // Default: false
+ bool error_if_exists;
+
+ // If true, the implementation will do aggressive checking of the
+ // data it is processing and will stop early if it detects any
+ // errors. This may have unforeseen ramifications: for example, a
+ // corruption of one DB entry may cause a large number of entries to
+ // become unreadable or for the entire DB to become unopenable.
+ // Default: false
+ bool paranoid_checks;
+
+#ifdef HAVE_ROCKSDB
+ // By default, RocksDB uses only one background thread for flush and
+ // compaction. Calling this function will set it up such that total of
+ // `total_threads` is used. Good value for `total_threads` is the number of
+ // cores. You almost definitely want to call this function if your system is
+ // bottlenecked by RocksDB.
+ Options* IncreaseParallelism(int = 16) { return this; }
+ Options* OptimizeLevelStyleCompaction() { return this; }
+#endif
+
+#if HAVE_BASHOLEVELDB
+ // Riak specific: this variable replaces paranoid_checks at one
+ // one place in the code. This variable alone controls whether or not
+ // compaction read operations check CRC values. Riak needs
+ // the compaction CRC check, but not other paranoid_checks ... so
+ // this independent control.
+ // Default: true
+ bool verify_compactions;
+#endif
+
+ // Use the specified object to interact with the environment,
+ // e.g. to read/write files, schedule background work, etc.
+ // Default: Env::Default()
+ Env* env;
+
+ // Any internal progress/error information generated by the db will
+ // be written to info_log if it is non-NULL, or to a file stored
+ // in the same directory as the DB contents if info_log is NULL.
+ // Default: NULL
+ Logger* info_log;
+
+ // -------------------
+ // Parameters that affect performance
+
+ // Amount of data to build up in memory (backed by an unsorted log
+ // on disk) before converting to a sorted on-disk file.
+ //
+ // Larger values increase performance, especially during bulk loads.
+ // Up to two write buffers may be held in memory at the same time,
+ // so you may wish to adjust this parameter to control memory usage.
+ // Also, a larger write buffer will result in a longer recovery time
+ // the next time the database is opened.
+ //
+ // Default: 4MB
+ size_t write_buffer_size;
+
+ // Number of open files that can be used by the DB. You may need to
+ // increase this if your database has a large working set (budget
+ // one open file per 2MB of working set).
+ //
+ // Default: 1000
+ int max_open_files;
+
+ // Control over blocks (user data is stored in a set of blocks, and
+ // a block is the unit of reading from disk).
+
+ // If non-NULL, use the specified cache for blocks.
+ // If NULL, leveldb will automatically create and use an 8MB internal cache.
+ // Default: NULL
+ Cache* block_cache;
+
+ // Approximate size of user data packed per block. Note that the
+ // block size specified here corresponds to uncompressed data. The
+ // actual size of the unit read from disk may be smaller if
+ // compression is enabled. This parameter can be changed dynamically.
+ //
+ // Default: 4K
+ size_t block_size;
+
+ // Number of keys between restart points for delta encoding of keys.
+ // This parameter can be changed dynamically. Most clients should
+ // leave this parameter alone.
+ //
+ // Default: 16
+ int block_restart_interval;
+
+ // Compress blocks using the specified compression algorithm. This
+ // parameter can be changed dynamically.
+ //
+ // Default: kSnappyCompression, which gives lightweight but fast
+ // compression.
+ //
+ // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
+ // ~200-500MB/s compression
+ // ~400-800MB/s decompression
+ // Note that these speeds are significantly faster than most
+ // persistent storage speeds, and therefore it is typically never
+ // worth switching to kNoCompression. Even if the input data is
+ // incompressible, the kSnappyCompression implementation will
+ // efficiently detect that and will switch to uncompressed mode.
+ CompressionType compression;
+
+ // If non-NULL, use the specified filter policy to reduce disk reads.
+ // Many applications will benefit from passing the result of
+ // NewBloomFilterPolicy() here.
+ //
+ // Default: NULL
+ const FilterPolicy* filter_policy;
+
+#ifdef HAVE_HYPERLEVELDB
+ // Is the database used with the Replay mechanism? If yes, the lower bound on
+ // values to compact is (somewhat) left up to the application; if no, then
+ // LevelDB functions as usual, and uses snapshots to determine the lower
+ // bound. HyperLevelDB will always maintain the integrity of snapshots, so
+ // the application merely has the option to hold data as if it's holding a
+ // snapshot. This just prevents compaction from grabbing data before the app
+ // can get a snapshot.
+ //
+ // Default: false/no.
+ bool manual_garbage_collection;
+#endif
+
+ // Create an Options object with default values for all fields.
+ Options();
+};
+
+#ifdef HAVE_ROCKSDB
+struct ColumnFamilyOptions : public Options {
+ ColumnFamilyOptions() : Options() {}
+};
+
+struct DBOptions : public Options {
+ DBOptions() : Options() {}
+};
+#endif
+
+// Options that control read operations
+struct ReadOptions {
+ // If true, all data read from underlying storage will be
+ // verified against corresponding checksums.
+ // Default: false
+ bool verify_checksums;
+
+ // Should the data read for this iteration be cached in memory?
+ // Callers may wish to set this field to false for bulk scans.
+ // Default: true
+ bool fill_cache;
+
+ // If "snapshot" is non-NULL, read as of the supplied snapshot
+ // (which must belong to the DB that is being read and which must
+ // not have been released). If "snapshot" is NULL, use an impliicit
+ // snapshot of the state at the beginning of this read operation.
+ // Default: NULL
+ const Snapshot* snapshot;
+
+ ReadOptions()
+ : verify_checksums(false),
+ fill_cache(true),
+ snapshot(NULL) {
+ }
+};
+
+// Options that control write operations
+struct WriteOptions {
+ // If true, the write will be flushed from the operating system
+ // buffer cache (by calling WritableFile::Sync()) before the write
+ // is considered complete. If this flag is true, writes will be
+ // slower.
+ //
+ // If this flag is false, and the machine crashes, some recent
+ // writes may be lost. Note that if it is just the process that
+ // crashes (i.e., the machine does not reboot), no writes will be
+ // lost even if sync==false.
+ //
+ // In other words, a DB write with sync==false has similar
+ // crash semantics as the "write()" system call. A DB write
+ // with sync==true has similar crash semantics to a "write()"
+ // system call followed by "fsync()".
+ //
+ // Default: false
+ bool sync;
+
+ WriteOptions()
+ : sync(false) {
+ }
+};
+
+#ifdef HAVE_ROCKSDB
+// Options that control flush operations
+struct FlushOptions {
+ // If true, the flush will wait until the flush is done.
+ // Default: true
+ bool wait;
+
+ FlushOptions() : wait(true) {}
+};
+#endif
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/slice.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/slice.h
new file mode 100644
index 00000000000..1eb66dd825f
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/slice.h
@@ -0,0 +1,127 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Slice is a simple structure containing a pointer into some external
+// storage and a size. The user of a Slice must ensure that the slice
+// is not used after the corresponding external storage has been
+// deallocated.
+//
+// Multiple threads can invoke const methods on a Slice without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Slice must use
+// external synchronization.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_
+#define STORAGE_LEVELDB_INCLUDE_SLICE_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <string>
+
+namespace leveldb {
+
+class Slice {
+ public:
+ // Create an empty slice.
+ Slice() : data_(""), size_(0) { }
+
+ // Create a slice that refers to d[0,n-1].
+ Slice(const char* d, size_t n) : data_(d), size_(n) { }
+
+ // Create a slice that refers to the contents of "s"
+ Slice(const std::string& s) : data_(s.data()), size_(s.size()) { }
+
+ // Create a slice that refers to s[0,strlen(s)-1]
+ Slice(const char* s) : data_(s), size_(strlen(s)) { }
+
+ // Return a pointer to the beginning of the referenced data
+ const char* data() const { return data_; }
+
+ // Return the length (in bytes) of the referenced data
+ size_t size() const { return size_; }
+
+ // Return true iff the length of the referenced data is zero
+ bool empty() const { return size_ == 0; }
+
+ // Return the ith byte in the referenced data.
+ // REQUIRES: n < size()
+ char operator[](size_t n) const {
+ assert(n < size());
+ return data_[n];
+ }
+
+ // Change this slice to refer to an empty array
+ void clear() { data_ = ""; size_ = 0; }
+
+ // Drop the first "n" bytes from this slice.
+ void remove_prefix(size_t n) {
+ assert(n <= size());
+ data_ += n;
+ size_ -= n;
+ }
+
+ // Return a string that contains the copy of the referenced data.
+ std::string ToString() const { return std::string(data_, size_); }
+
+ // Three-way comparison. Returns value:
+ // < 0 iff "*this" < "b",
+ // == 0 iff "*this" == "b",
+ // > 0 iff "*this" > "b"
+ int compare(const Slice& b) const;
+
+ // Return true iff "x" is a prefix of "*this"
+ bool starts_with(const Slice& x) const {
+ return ((size_ >= x.size_) &&
+ (memcmp(data_, x.data_, x.size_) == 0));
+ }
+
+// The LevelDB JNI layer peeks in here
+// private:
+ const char* data_;
+ size_t size_;
+
+ // Intentionally copyable
+};
+
+#ifdef HAVE_ROCKSDB
+// A set of Slices that are virtually concatenated together. 'parts' points
+// to an array of Slices. The number of elements in the array is 'num_parts'.
+struct SliceParts {
+ SliceParts(const Slice* _parts, int _num_parts) :
+ parts(_parts), num_parts(_num_parts) { }
+
+ const Slice* parts;
+ int num_parts;
+};
+#endif
+
+inline bool operator==(const Slice& x, const Slice& y) {
+ return ((x.size() == y.size()) &&
+ (memcmp(x.data(), y.data(), x.size()) == 0));
+}
+
+inline bool operator!=(const Slice& x, const Slice& y) {
+ return !(x == y);
+}
+
+inline int Slice::compare(const Slice& b) const {
+ const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
+ int r = memcmp(data_, b.data_, min_len);
+ if (r == 0) {
+ if (size_ < b.size_) r = -1;
+ else if (size_ > b.size_) r = +1;
+ }
+ return r;
+}
+
+} // namespace leveldb
+
+
+#endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/status.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/status.h
new file mode 100644
index 00000000000..3c21f64462b
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/status.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation. It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_
+#define STORAGE_LEVELDB_INCLUDE_STATUS_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <string>
+#include "slice.h"
+
+namespace leveldb {
+
+class Status {
+ public:
+ // Create a success status.
+ Status() : state_(NULL) { }
+ ~Status() { delete[] state_; }
+
+ // Copy the specified status.
+ Status(const Status& s);
+ void operator=(const Status& s);
+
+ // Return a success status.
+ static Status OK() { return Status(); }
+
+ // Return error status of an appropriate type.
+ static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) {
+ return Status(kNotFound, msg, msg2);
+ }
+ static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) {
+ return Status(kCorruption, msg, msg2);
+ }
+ static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) {
+ return Status(kNotSupported, msg, msg2);
+ }
+ static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) {
+ return Status(kInvalidArgument, msg, msg2);
+ }
+ static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) {
+ return Status(kIOError, msg, msg2);
+ }
+
+ // Returns true iff the status indicates success.
+ bool ok() const { return (state_ == NULL); }
+
+ // Returns true iff the status indicates a NotFound error.
+ bool IsNotFound() const { return code() == kNotFound; }
+
+ // Returns true iff the status indicates a Corruption error.
+ bool IsCorruption() const { return code() == kCorruption; }
+
+ // Returns true iff the status indicates an IOError.
+ bool IsIOError() const { return code() == kIOError; }
+
+ // Return a string representation of this status suitable for printing.
+ // Returns the string "OK" for success.
+ std::string ToString() const;
+
+ private:
+ // OK status has a NULL state_. Otherwise, state_ is a new[] array
+ // of the following form:
+ // state_[0..3] == length of message
+ // state_[4] == code
+ // state_[5..] == message
+ const char* state_;
+
+ enum Code {
+ kOk = 0,
+ kNotFound = 1,
+ kCorruption = 2,
+ kNotSupported = 3,
+ kInvalidArgument = 4,
+ kIOError = 5
+ };
+
+ Code code() const {
+ return (state_ == NULL) ? kOk : static_cast<Code>(state_[4]);
+ }
+
+ Status(Code code, const Slice& msg, const Slice& msg2);
+ static const char* CopyState(const char* s);
+};
+
+inline Status::Status(const Status& s) {
+ state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
+}
+inline void Status::operator=(const Status& s) {
+ // The following condition catches both aliasing (when this == &s),
+ // and the common case where both s and *this are ok.
+ if (state_ != s.state_) {
+ delete[] state_;
+ state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
+ }
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_STATUS_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/write_batch.h b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/write_batch.h
new file mode 100644
index 00000000000..293b41ad818
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/include/leveldb/write_batch.h
@@ -0,0 +1,142 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch holds a collection of updates to apply atomically to a DB.
+//
+// The updates are applied in the order in which they are added
+// to the WriteBatch. For example, the value of "key" will be "v3"
+// after the following batch is written:
+//
+// batch.Put("key", "v1");
+// batch.Delete("key");
+// batch.Put("key", "v2");
+// batch.Put("key", "v3");
+//
+// Multiple threads can invoke const methods on a WriteBatch without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same WriteBatch must use
+// external synchronization.
+
+#ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
+#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
+
+#include "leveldb_wt_config.h"
+#if defined(HAVE_ROCKSDB) && !defined(leveldb)
+#define leveldb rocksdb
+#endif
+
+#include <string>
+#include "status.h"
+
+namespace leveldb {
+
+class Slice;
+#if HAVE_ROCKSDB
+class ColumnFamilyHandle;
+struct SliceParts;
+#endif
+
+class WriteBatch {
+ public:
+#ifdef HAVE_ROCKSDB
+ explicit WriteBatch(size_t reserved_bytes = 0);
+#else
+ WriteBatch();
+#endif
+ ~WriteBatch();
+
+ // Store the mapping "key->value" in the database.
+ void Put(const Slice& key, const Slice& value);
+
+ // If the database contains a mapping for "key", erase it. Else do nothing.
+ void Delete(const Slice& key);
+
+ // Clear all updates buffered in this batch.
+ void Clear();
+
+#ifdef HAVE_ROCKSDB
+ void Put(ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value);
+
+ // Variant of Put() that gathers output like writev(2). The key and value
+ // that will be written to the database are concatentations of arrays of
+ // slices.
+ void Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+ const SliceParts& value);
+
+ void Delete(ColumnFamilyHandle* column_family, const Slice& key);
+#endif
+
+ // Support for iterating over the contents of a batch.
+ class Handler {
+ public:
+ virtual ~Handler();
+#ifdef HAVE_ROCKSDB
+ // default implementation will just call Put without column family for
+ // backwards compatibility. If the column family is not default,
+ // the function is noop
+ virtual Status PutCF(uint32_t column_family_id, const Slice& key,
+ const Slice& value) {
+ if (column_family_id == 0) {
+ // Put() historically doesn't return status. We didn't want to be
+ // backwards incompatible so we didn't change the return status
+ // (this is a public API). We do an ordinary get and return Status::OK()
+ Put(key, value);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and PutCF not implemented");
+ }
+ // Merge and LogData are not pure virtual. Otherwise, we would break
+ // existing clients of Handler on a source code level. The default
+ // implementation of Merge simply throws a runtime exception.
+ virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
+ const Slice& value) {
+ if (column_family_id == 0) {
+ Merge(key, value);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and MergeCF not implemented");
+ }
+ virtual void Merge(const Slice& key, const Slice& value);
+ // The default implementation of LogData does nothing.
+ virtual void LogData(const Slice& blob);
+ virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
+ if (column_family_id == 0) {
+ Delete(key);
+ return Status::OK();
+ }
+ return Status::InvalidArgument(
+ "non-default column family and DeleteCF not implemented");
+ }
+ // Continue is called by WriteBatch::Iterate. If it returns false,
+ // iteration is halted. Otherwise, it continues iterating. The default
+ // implementation always returns true.
+ virtual bool Continue();
+#endif
+ virtual void Put(const Slice& key, const Slice& value) = 0;
+ virtual void Delete(const Slice& key) = 0;
+ };
+ Status Iterate(Handler* handler) const;
+
+#ifdef HAVE_ROCKSDB
+ // Retrieve data size of the batch.
+ size_t GetDataSize() const { return rep_.size(); }
+
+ // Returns the number of updates in the batch
+ int Count() const;
+#endif
+
+ private:
+ friend class WriteBatchInternal;
+
+ std::string rep_; // See comment in write_batch.cc for the format of rep_
+
+ // Intentionally copyable
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/port/port.h b/src/third_party/wiredtiger/api/leveldb/leveldb/port/port.h
new file mode 100644
index 00000000000..1f83635a82c
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/port/port.h
@@ -0,0 +1,38 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _PORT_H_
+#define _PORT_H_ 1
+/* Stub portability header for imported LevelDB code. */
+
+#include "wiredtiger.h"
+
+namespace port {
+ const int kLittleEndian = 1;
+}
+
+#endif
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/arena.h b/src/third_party/wiredtiger/api/leveldb/leveldb/util/arena.h
new file mode 100644
index 00000000000..8f7dde226c4
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/arena.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
+#define STORAGE_LEVELDB_UTIL_ARENA_H_
+
+#include <cstddef>
+#include <vector>
+#include <assert.h>
+#include <stdint.h>
+
+namespace leveldb {
+
+class Arena {
+ public:
+ Arena();
+ ~Arena();
+
+ // Return a pointer to a newly allocated memory block of "bytes" bytes.
+ char* Allocate(size_t bytes);
+
+ // Allocate memory with the normal alignment guarantees provided by malloc
+ char* AllocateAligned(size_t bytes);
+
+ // Returns an estimate of the total memory usage of data allocated
+ // by the arena (including space allocated but not yet used for user
+ // allocations).
+ size_t MemoryUsage() const {
+ return blocks_memory_ + blocks_.capacity() * sizeof(char*);
+ }
+
+ private:
+ char* AllocateFallback(size_t bytes);
+ char* AllocateNewBlock(size_t block_bytes);
+
+ // Allocation state
+ char* alloc_ptr_;
+ size_t alloc_bytes_remaining_;
+
+ // Array of new[] allocated memory blocks
+ std::vector<char*> blocks_;
+
+ // Bytes of memory in blocks allocated so far
+ size_t blocks_memory_;
+
+ // No copying allowed
+ Arena(const Arena&);
+ void operator=(const Arena&);
+};
+
+inline char* Arena::Allocate(size_t bytes) {
+ // The semantics of what to return are a bit messy if we allow
+ // 0-byte allocations, so we disallow them here (we don't need
+ // them for our internal use).
+ assert(bytes > 0);
+ if (bytes <= alloc_bytes_remaining_) {
+ char* result = alloc_ptr_;
+ alloc_ptr_ += bytes;
+ alloc_bytes_remaining_ -= bytes;
+ return result;
+ }
+ return AllocateFallback(bytes);
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_ARENA_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.cc
new file mode 100644
index 00000000000..ad1f457a16a
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.cc
@@ -0,0 +1,163 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/coding.h"
+
+namespace leveldb {
+
+char* EncodeVarint32(char* dst, uint32_t v) {
+ // Operate on characters as unsigneds
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ static const int B = 128;
+ if (v < (1<<7)) {
+ *(ptr++) = v;
+ } else if (v < (1<<14)) {
+ *(ptr++) = v | B;
+ *(ptr++) = v>>7;
+ } else if (v < (1<<21)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = v>>14;
+ } else if (v < (1<<28)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = v>>21;
+ } else {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = (v>>21) | B;
+ *(ptr++) = v>>28;
+ }
+ return reinterpret_cast<char*>(ptr);
+}
+
+const char* GetVarint32PtrFallback(const char* p,
+ const char* limit,
+ uint32_t* value) {
+ uint32_t result = 0;
+ for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
+ uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
+ uint64_t result = 0;
+ for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
+ uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+#ifdef HAVE_ROCKSDB
+void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
+ uint32_t bits, uint64_t value) {
+ assert((offset + bits + 7)/8 <= dstlen);
+ assert(bits <= 64);
+
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+
+ size_t byteOffset = offset / 8;
+ size_t bitOffset = offset % 8;
+
+ // This prevents unused variable warnings when compiling.
+#ifndef NDEBUG
+ // Store truncated value.
+ uint64_t origValue = (bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value;
+ uint32_t origBits = bits;
+#endif
+
+ while (bits > 0) {
+ size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
+ unsigned char mask = ((1 << bitsToGet) - 1);
+
+ ptr[byteOffset] = (ptr[byteOffset] & ~(mask << bitOffset)) +
+ ((value & mask) << bitOffset);
+
+ value >>= bitsToGet;
+ byteOffset += 1;
+ bitOffset = 0;
+ bits -= bitsToGet;
+ }
+
+ assert(origValue == BitStreamGetInt(dst, dstlen, offset, origBits));
+}
+
+uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset,
+ uint32_t bits) {
+ assert((offset + bits + 7)/8 <= srclen);
+ assert(bits <= 64);
+
+ const unsigned char* ptr = reinterpret_cast<const unsigned char*>(src);
+
+ uint64_t result = 0;
+
+ size_t byteOffset = offset / 8;
+ size_t bitOffset = offset % 8;
+ size_t shift = 0;
+
+ while (bits > 0) {
+ size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
+ unsigned char mask = ((1 << bitsToGet) - 1);
+
+ result += (uint64_t)((ptr[byteOffset] >> bitOffset) & mask) << shift;
+
+ shift += bitsToGet;
+ byteOffset += 1;
+ bitOffset = 0;
+ bits -= bitsToGet;
+ }
+
+ return result;
+ }
+
+void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits,
+ uint64_t value) {
+ assert((offset + bits + 7)/8 <= dst->size());
+
+ const size_t kTmpBufLen = sizeof(value) + 1;
+ char tmpBuf[kTmpBufLen];
+
+ // Number of bytes of tmpBuf being used
+ const size_t kUsedBytes = (offset%8 + bits)/8;
+
+ // Copy relevant parts of dst to tmpBuf
+ for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
+ tmpBuf[idx] = (*dst)[offset/8 + idx];
+ }
+
+ BitStreamPutInt(tmpBuf, kTmpBufLen, offset%8, bits, value);
+
+ // Copy tmpBuf back to dst
+ for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
+ (*dst)[offset/8 + idx] = tmpBuf[idx];
+
+ // Do the check here too as we are working with a buffer.
+ assert(((bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value) ==
+ BitStreamGetInt(dst, offset, bits));
+ }
+}
+#endif
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.h b/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.h
new file mode 100644
index 00000000000..ed56ef4ea2d
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/coding.h
@@ -0,0 +1,311 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Endian-neutral encoding:
+// * Fixed-length numbers are encoded with least-significant byte first
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+
+#ifndef STORAGE_LEVELDB_UTIL_CODING_H_
+#define STORAGE_LEVELDB_UTIL_CODING_H_
+
+#include <algorithm>
+#include <stdint.h>
+#include <string.h>
+#include <string>
+#include "leveldb_wt.h"
+#include "port/port.h"
+
+namespace leveldb {
+
+// The maximum length of a varint in bytes for 32 and 64 bits respectively.
+const unsigned int kMaxVarint32Length = 5;
+const unsigned int kMaxVarint64Length = 10;
+
+// Standard Put... routines append to a string
+extern void PutFixed32(std::string* dst, uint32_t value);
+extern void PutFixed64(std::string* dst, uint64_t value);
+extern void PutVarint32(std::string* dst, uint32_t value);
+extern void PutVarint64(std::string* dst, uint64_t value);
+extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
+
+// Standard Get... routines parse a value from the beginning of a Slice
+// and advance the slice past the parsed value.
+extern bool GetVarint32(Slice* input, uint32_t* value);
+extern bool GetVarint64(Slice* input, uint64_t* value);
+extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
+
+#ifdef HAVE_ROCKSDB
+extern void PutLengthPrefixedSliceParts(std::string* dst,
+ const SliceParts& slice_parts);
+extern bool GetFixed64(Slice* input, uint64_t* value);
+// This function assumes data is well-formed.
+extern Slice GetLengthPrefixedSlice(const char* data);
+
+extern Slice GetSliceUntil(Slice* slice, char delimiter);
+#endif
+
+// Pointer-based variants of GetVarint... These either store a value
+// in *v and return a pointer just past the parsed value, or return
+// NULL on error. These routines only look at bytes in the range
+// [p..limit-1]
+extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v);
+extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v);
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+extern void EncodeFixed32(char* dst, uint32_t value);
+extern void EncodeFixed64(char* dst, uint64_t value);
+
+// Lower-level versions of Put... that write directly into a character buffer
+// and return a pointer just past the last byte written.
+// REQUIRES: dst has enough space for the value being written
+extern char* EncodeVarint32(char* dst, uint32_t value);
+extern char* EncodeVarint64(char* dst, uint64_t value);
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint32_t DecodeFixed32(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint32_t result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
+ | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
+ }
+}
+
+inline uint64_t DecodeFixed64(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint64_t result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ uint64_t lo = DecodeFixed32(ptr);
+ uint64_t hi = DecodeFixed32(ptr + 4);
+ return (hi << 32) | lo;
+ }
+}
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p,
+ const char* limit,
+ uint32_t* value);
+inline const char* GetVarint32Ptr(const char* p,
+ const char* limit,
+ uint32_t* value) {
+ if (p < limit) {
+ uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
+ if ((result & 128) == 0) {
+ *value = result;
+ return p + 1;
+ }
+ }
+ return GetVarint32PtrFallback(p, limit, value);
+}
+
+// Writes an unsigned integer with bits number of bits with its least
+// significant bit at offset.
+// Bits are numbered from 0 to 7 in the first byte, 8 to 15 in the second and
+// so on.
+// value is truncated to the bits number of least significant bits.
+// REQUIRES: (offset+bits+7)/8 <= dstlen
+// REQUIRES: bits <= 64
+extern void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
+ uint32_t bits, uint64_t value);
+
+// Reads an unsigned integer with bits number of bits with its least
+// significant bit at offset.
+// Bits are numbered in the same way as ByteStreamPutInt().
+// REQUIRES: (offset+bits+7)/8 <= srclen
+// REQUIRES: bits <= 64
+extern uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset,
+ uint32_t bits);
+
+// Convenience functions
+extern void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits,
+ uint64_t value);
+extern uint64_t BitStreamGetInt(const std::string* src, size_t offset,
+ uint32_t bits);
+extern uint64_t BitStreamGetInt(const Slice* src, size_t offset,
+ uint32_t bits);
+
+// -- Implementation of the functions declared above
+inline void EncodeFixed32(char* buf, uint32_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(buf, &value, sizeof(value));
+#else
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+#endif
+}
+
+inline void EncodeFixed64(char* buf, uint64_t value) {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ memcpy(buf, &value, sizeof(value));
+#else
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+ buf[4] = (value >> 32) & 0xff;
+ buf[5] = (value >> 40) & 0xff;
+ buf[6] = (value >> 48) & 0xff;
+ buf[7] = (value >> 56) & 0xff;
+#endif
+}
+
+inline void PutFixed32(std::string* dst, uint32_t value) {
+ char buf[sizeof(value)];
+ EncodeFixed32(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+inline void PutFixed64(std::string* dst, uint64_t value) {
+ char buf[sizeof(value)];
+ EncodeFixed64(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+inline void PutVarint32(std::string* dst, uint32_t v) {
+ char buf[5];
+ char* ptr = EncodeVarint32(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+inline char* EncodeVarint64(char* dst, uint64_t v) {
+ static const unsigned int B = 128;
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ while (v >= B) {
+ *(ptr++) = (v & (B - 1)) | B;
+ v >>= 7;
+ }
+ *(ptr++) = static_cast<unsigned char>(v);
+ return reinterpret_cast<char*>(ptr);
+}
+
+inline void PutVarint64(std::string* dst, uint64_t v) {
+ char buf[10];
+ char* ptr = EncodeVarint64(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
+ PutVarint32(dst, value.size());
+ dst->append(value.data(), value.size());
+}
+
+#ifdef HAVE_ROCKSDB
+inline void PutLengthPrefixedSliceParts(std::string* dst,
+ const SliceParts& slice_parts) {
+ uint32_t total_bytes = 0;
+ for (int i = 0; i < slice_parts.num_parts; ++i) {
+ total_bytes += slice_parts.parts[i].size();
+ }
+ PutVarint32(dst, total_bytes);
+ for (int i = 0; i < slice_parts.num_parts; ++i) {
+ dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
+ }
+}
+#endif
+
+inline int VarintLength(uint64_t v) {
+ int len = 1;
+ while (v >= 128) {
+ v >>= 7;
+ len++;
+ }
+ return len;
+}
+
+#ifdef HAVE_ROCKSDB
+inline bool GetFixed64(Slice* input, uint64_t* value) {
+ if (input->size() < sizeof(uint64_t)) {
+ return false;
+ }
+ *value = DecodeFixed64(input->data());
+ input->remove_prefix(sizeof(uint64_t));
+ return true;
+}
+#endif
+
+inline bool GetVarint32(Slice* input, uint32_t* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint32Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = Slice(q, limit - q);
+ return true;
+ }
+}
+
+inline bool GetVarint64(Slice* input, uint64_t* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint64Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = Slice(q, limit - q);
+ return true;
+ }
+}
+
+inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
+ uint32_t len = 0;
+ if (GetVarint32(input, &len) && input->size() >= len) {
+ *result = Slice(input->data(), len);
+ input->remove_prefix(len);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+#ifdef HAVE_ROCKSDB
+inline Slice GetLengthPrefixedSlice(const char* data) {
+ uint32_t len = 0;
+ // +5: we assume "data" is not corrupted
+ const char *p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
+ return Slice(p, len);
+}
+
+inline Slice GetSliceUntil(Slice* slice, char delimiter) {
+ uint32_t len = 0;
+ for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
+ // nothing
+ }
+
+ Slice ret(slice->data(), len);
+ slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
+ return ret;
+}
+#endif
+
+inline uint64_t BitStreamGetInt(const std::string* src, size_t offset,
+ uint32_t bits) {
+ return BitStreamGetInt(src->data(), src->size(), offset, bits);
+}
+
+inline uint64_t BitStreamGetInt(const Slice* src, size_t offset,
+ uint32_t bits) {
+ return BitStreamGetInt(src->data(), src->size(), offset, bits);
+}
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_CODING_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/comparator.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/comparator.cc
new file mode 100644
index 00000000000..57c89628af9
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/comparator.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <algorithm>
+#include <stdint.h>
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+
+namespace leveldb {
+
+Comparator::~Comparator() { }
+
+#ifdef HAVE_HYPERLEVELDB
+uint64_t Comparator::KeyNum(const Slice& key) const {
+ return 0;
+}
+#endif
+
+namespace {
+class BytewiseComparatorImpl : public Comparator {
+ public:
+ BytewiseComparatorImpl() { }
+
+ virtual const char* Name() const {
+ return "leveldb.BytewiseComparator";
+ }
+
+ virtual int Compare(const Slice& a, const Slice& b) const {
+ return a.compare(b);
+ }
+
+ virtual void FindShortestSeparator(
+ std::string* start,
+ const Slice& limit) const {
+ // Find length of common prefix
+ size_t min_length = std::min(start->size(), limit.size());
+ size_t diff_index = 0;
+ while ((diff_index < min_length) &&
+ ((*start)[diff_index] == limit[diff_index])) {
+ diff_index++;
+ }
+
+ if (diff_index >= min_length) {
+ // Do not shorten if one string is a prefix of the other
+ } else {
+ uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
+ if (diff_byte < static_cast<uint8_t>(0xff) &&
+ diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
+ (*start)[diff_index]++;
+ start->resize(diff_index + 1);
+ assert(Compare(*start, limit) < 0);
+ }
+ }
+ }
+
+ virtual void FindShortSuccessor(std::string* key) const {
+ // Find first character that can be incremented
+ size_t n = key->size();
+ for (size_t i = 0; i < n; i++) {
+ const uint8_t byte = (*key)[i];
+ if (byte != static_cast<uint8_t>(0xff)) {
+ (*key)[i] = byte + 1;
+ key->resize(i+1);
+ return;
+ }
+ }
+ // *key is a run of 0xffs. Leave it alone.
+ }
+};
+} // namespace
+
+static const Comparator* bytewise = new BytewiseComparatorImpl;
+
+const Comparator* BytewiseComparator() {
+ return bytewise;
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/env.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/env.cc
new file mode 100644
index 00000000000..00a04f0dc3e
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/env.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+Env::~Env() {
+}
+
+SequentialFile::~SequentialFile() {
+}
+
+RandomAccessFile::~RandomAccessFile() {
+}
+
+WritableFile::~WritableFile() {
+}
+
+Logger::~Logger() {
+}
+
+FileLock::~FileLock() {
+}
+
+void Log(Logger* info_log, const char* format, ...) {
+ if (info_log != NULL) {
+ va_list ap;
+ va_start(ap, format);
+ info_log->Logv(format, ap);
+ va_end(ap);
+ }
+}
+
+static Status DoWriteStringToFile(Env* env, const Slice& data,
+ const std::string& fname,
+ bool should_sync) {
+ WritableFile* file;
+ Status s = env->NewWritableFile(fname, &file);
+ if (!s.ok()) {
+ return s;
+ }
+ s = file->Append(data);
+ if (s.ok() && should_sync) {
+ s = file->Sync();
+ }
+ if (s.ok()) {
+ s = file->Close();
+ }
+ delete file; // Will auto-close if we did not close above
+ if (!s.ok()) {
+ env->DeleteFile(fname);
+ }
+ return s;
+}
+
+Status WriteStringToFile(Env* env, const Slice& data,
+ const std::string& fname) {
+ return DoWriteStringToFile(env, data, fname, false);
+}
+
+Status WriteStringToFileSync(Env* env, const Slice& data,
+ const std::string& fname) {
+ return DoWriteStringToFile(env, data, fname, true);
+}
+
+Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
+ data->clear();
+ SequentialFile* file;
+ Status s = env->NewSequentialFile(fname, &file);
+ if (!s.ok()) {
+ return s;
+ }
+ static const int kBufferSize = 8192;
+ char* space = new char[kBufferSize];
+ while (true) {
+ Slice fragment;
+ s = file->Read(kBufferSize, &fragment, space);
+ if (!s.ok()) {
+ break;
+ }
+ data->append(fragment.data(), fragment.size());
+ if (fragment.empty()) {
+ break;
+ }
+ }
+ delete[] space;
+ delete file;
+ return s;
+}
+
+EnvWrapper::~EnvWrapper() {
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/env_posix.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/env_posix.cc
new file mode 100644
index 00000000000..084ae160807
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/env_posix.cc
@@ -0,0 +1,625 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <deque>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(LEVELDB_PLATFORM_ANDROID)
+#include <sys/stat.h>
+#endif
+#include "leveldb_wt.h"
+#include "port/port.h"
+#include "util/logging.h"
+#include "util/posix_logger.h"
+
+namespace leveldb {
+
+namespace {
+
+static Status IOError(const std::string& context, int err_number) {
+ return Status::IOError(context, strerror(err_number));
+}
+
+class PosixSequentialFile: public SequentialFile {
+ private:
+ std::string filename_;
+ FILE* file_;
+
+ public:
+ PosixSequentialFile(const std::string& fname, FILE* f)
+ : filename_(fname), file_(f) { }
+ virtual ~PosixSequentialFile() { fclose(file_); }
+
+ virtual Status Read(size_t n, Slice* result, char* scratch) {
+ Status s;
+#ifdef HAVE_FREAD_UNLOCKED
+ size_t r = fread_unlocked(scratch, 1, n, file_);
+#else
+ size_t r = fread(scratch, 1, n, file_);
+#endif
+ *result = Slice(scratch, r);
+ if (r < n) {
+ if (feof(file_)) {
+ // We leave status as ok if we hit the end of the file
+ } else {
+ // A partial read with an error: return a non-ok status
+ s = IOError(filename_, errno);
+ }
+ }
+ return s;
+ }
+
+ virtual Status Skip(uint64_t n) {
+ if (fseek(file_, n, SEEK_CUR)) {
+ return IOError(filename_, errno);
+ }
+ return Status::OK();
+ }
+};
+
+// pread() based random-access
+class PosixRandomAccessFile: public RandomAccessFile {
+ private:
+ std::string filename_;
+ int fd_;
+
+ public:
+ PosixRandomAccessFile(const std::string& fname, int fd)
+ : filename_(fname), fd_(fd) { }
+ virtual ~PosixRandomAccessFile() { close(fd_); }
+
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const {
+ Status s;
+ ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
+ *result = Slice(scratch, (r < 0) ? 0 : r);
+ if (r < 0) {
+ // An error: return a non-ok status
+ s = IOError(filename_, errno);
+ }
+ return s;
+ }
+};
+
+// mmap() based random-access
+class PosixMmapReadableFile: public RandomAccessFile {
+ private:
+ std::string filename_;
+ void* mmapped_region_;
+ size_t length_;
+
+ public:
+ // base[0,length-1] contains the mmapped contents of the file.
+ PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
+ : filename_(fname), mmapped_region_(base), length_(length) { }
+ virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
+
+ virtual Status Read(uint64_t offset, size_t n, Slice* result,
+ char* scratch) const {
+ Status s;
+ if (offset + n > length_) {
+ *result = Slice();
+ s = IOError(filename_, EINVAL);
+ } else {
+ *result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
+ }
+ return s;
+ }
+};
+
+// We preallocate up to an extra megabyte and use memcpy to append new
+// data to the file. This is safe since we either properly close the
+// file before reading from it, or for log files, the reading code
+// knows enough to skip zero suffixes.
+class PosixMmapFile : public WritableFile {
+ private:
+ std::string filename_;
+ int fd_;
+ size_t page_size_;
+ size_t map_size_; // How much extra memory to map at a time
+ char* base_; // The mapped region
+ char* limit_; // Limit of the mapped region
+ char* dst_; // Where to write next (in range [base_,limit_])
+ char* last_sync_; // Where have we synced up to
+ uint64_t file_offset_; // Offset of base_ in file
+
+ // Have we done an munmap of unsynced data?
+ bool pending_sync_;
+
+ // Roundup x to a multiple of y
+ static size_t Roundup(size_t x, size_t y) {
+ return ((x + y - 1) / y) * y;
+ }
+
+ size_t TruncateToPageBoundary(size_t s) {
+ s -= (s & (page_size_ - 1));
+ assert((s % page_size_) == 0);
+ return s;
+ }
+
+ bool UnmapCurrentRegion() {
+ bool result = true;
+ if (base_ != NULL) {
+ if (last_sync_ < limit_) {
+ // Defer syncing this data until next Sync() call, if any
+ pending_sync_ = true;
+ }
+ if (munmap(base_, limit_ - base_) != 0) {
+ result = false;
+ }
+ file_offset_ += limit_ - base_;
+ base_ = NULL;
+ limit_ = NULL;
+ last_sync_ = NULL;
+ dst_ = NULL;
+
+ // Increase the amount we map the next time, but capped at 1MB
+ if (map_size_ < (1<<20)) {
+ map_size_ *= 2;
+ }
+ }
+ return result;
+ }
+
+ bool MapNewRegion() {
+ assert(base_ == NULL);
+ if (ftruncate(fd_, file_offset_ + map_size_) < 0) {
+ return false;
+ }
+ void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd_, file_offset_);
+ if (ptr == MAP_FAILED) {
+ return false;
+ }
+ base_ = reinterpret_cast<char*>(ptr);
+ limit_ = base_ + map_size_;
+ dst_ = base_;
+ last_sync_ = base_;
+ return true;
+ }
+
+ public:
+ PosixMmapFile(const std::string& fname, int fd, size_t page_size)
+ : filename_(fname),
+ fd_(fd),
+ page_size_(page_size),
+ map_size_(Roundup(65536, page_size)),
+ base_(NULL),
+ limit_(NULL),
+ dst_(NULL),
+ last_sync_(NULL),
+ file_offset_(0),
+ pending_sync_(false) {
+ assert((page_size & (page_size - 1)) == 0);
+ }
+
+
+ ~PosixMmapFile() {
+ if (fd_ >= 0) {
+ PosixMmapFile::Close();
+ }
+ }
+
+ virtual Status Append(const Slice& data) {
+ const char* src = data.data();
+ size_t left = data.size();
+ while (left > 0) {
+ assert(base_ <= dst_);
+ assert(dst_ <= limit_);
+ size_t avail = limit_ - dst_;
+ if (avail == 0) {
+ if (!UnmapCurrentRegion() ||
+ !MapNewRegion()) {
+ return IOError(filename_, errno);
+ }
+ }
+
+ size_t n = (left <= avail) ? left : avail;
+ memcpy(dst_, src, n);
+ dst_ += n;
+ src += n;
+ left -= n;
+ }
+ return Status::OK();
+ }
+
+ virtual Status Close() {
+ Status s;
+ size_t unused = limit_ - dst_;
+ if (!UnmapCurrentRegion()) {
+ s = IOError(filename_, errno);
+ } else if (unused > 0) {
+ // Trim the extra space at the end of the file
+ if (ftruncate(fd_, file_offset_ - unused) < 0) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ if (close(fd_) < 0) {
+ if (s.ok()) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ fd_ = -1;
+ base_ = NULL;
+ limit_ = NULL;
+ return s;
+ }
+
+ virtual Status Flush() {
+ return Status::OK();
+ }
+
+ virtual Status Sync() {
+ Status s;
+
+ if (pending_sync_) {
+ // Some unmapped data was not synced
+ pending_sync_ = false;
+#ifdef HAVE_FDATASYNC
+ if (fdatasync(fd_) < 0) {
+#else
+ if (fsync(fd_) < 0) {
+#endif
+ s = IOError(filename_, errno);
+ }
+ }
+
+ if (dst_ > last_sync_) {
+ // Find the beginnings of the pages that contain the first and last
+ // bytes to be synced.
+ size_t p1 = TruncateToPageBoundary(last_sync_ - base_);
+ size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1);
+ last_sync_ = dst_;
+ if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
+ s = IOError(filename_, errno);
+ }
+ }
+
+ return s;
+ }
+
+#ifdef HAVE_HYPERLEVELDB
+ virtual Status WriteAt(uint64_t offset, const Slice& data) { return Status::NotSupported("sorry!"); }
+#endif
+};
+
+static int LockOrUnlock(int fd, bool lock) {
+ errno = 0;
+ struct flock f;
+ memset(&f, 0, sizeof(f));
+ f.l_type = (lock ? F_WRLCK : F_UNLCK);
+ f.l_whence = SEEK_SET;
+ f.l_start = 0;
+ f.l_len = 0; // Lock/unlock entire file
+ return fcntl(fd, F_SETLK, &f);
+}
+
+class PosixFileLock : public FileLock {
+ public:
+ int fd_;
+};
+
+class PosixEnv : public Env {
+ public:
+ PosixEnv();
+ virtual ~PosixEnv() {
+ fprintf(stderr, "Destroying Env::Default()\n");
+ exit(1);
+ }
+
+ virtual Status NewSequentialFile(const std::string& fname,
+ SequentialFile** result) {
+ FILE* f = fopen(fname.c_str(), "r");
+ if (f == NULL) {
+ *result = NULL;
+ return IOError(fname, errno);
+ } else {
+ *result = new PosixSequentialFile(fname, f);
+ return Status::OK();
+ }
+ }
+
+ virtual Status NewRandomAccessFile(const std::string& fname,
+ RandomAccessFile** result) {
+ *result = NULL;
+ Status s;
+ int fd = open(fname.c_str(), O_RDONLY);
+ if (fd < 0) {
+ s = IOError(fname, errno);
+ } else if (sizeof(void*) >= 8) {
+ // Use mmap when virtual address-space is plentiful.
+ uint64_t size;
+ s = GetFileSize(fname, &size);
+ if (s.ok()) {
+ void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+ if (base != MAP_FAILED) {
+ *result = new PosixMmapReadableFile(fname, base, size);
+ } else {
+ s = IOError(fname, errno);
+ }
+ }
+ close(fd);
+ } else {
+ *result = new PosixRandomAccessFile(fname, fd);
+ }
+ return s;
+ }
+
+ virtual Status NewWritableFile(const std::string& fname,
+ WritableFile** result) {
+ Status s;
+ const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644);
+ if (fd < 0) {
+ *result = NULL;
+ s = IOError(fname, errno);
+ } else {
+ *result = new PosixMmapFile(fname, fd, page_size_);
+ }
+ return s;
+ }
+
+ virtual bool FileExists(const std::string& fname) {
+ return access(fname.c_str(), F_OK) == 0;
+ }
+
+ virtual Status GetChildren(const std::string& dir,
+ std::vector<std::string>* result) {
+ result->clear();
+ DIR* d = opendir(dir.c_str());
+ if (d == NULL) {
+ return IOError(dir, errno);
+ }
+ struct dirent* entry;
+ while ((entry = readdir(d)) != NULL) {
+ result->push_back(entry->d_name);
+ }
+ closedir(d);
+ return Status::OK();
+ }
+
+ virtual Status DeleteFile(const std::string& fname) {
+ Status result;
+ if (unlink(fname.c_str()) != 0) {
+ result = IOError(fname, errno);
+ }
+ return result;
+ };
+
+ virtual Status CreateDir(const std::string& name) {
+ Status result;
+ if (mkdir(name.c_str(), 0755) != 0) {
+ result = IOError(name, errno);
+ }
+ return result;
+ };
+
+ virtual Status DeleteDir(const std::string& name) {
+ Status result;
+ if (rmdir(name.c_str()) != 0) {
+ result = IOError(name, errno);
+ }
+ return result;
+ };
+
+ virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
+ Status s;
+ struct stat sbuf;
+ if (stat(fname.c_str(), &sbuf) != 0) {
+ *size = 0;
+ s = IOError(fname, errno);
+ } else {
+ *size = sbuf.st_size;
+ }
+ return s;
+ }
+
+ virtual Status RenameFile(const std::string& src, const std::string& target) {
+ Status result;
+ if (rename(src.c_str(), target.c_str()) != 0) {
+ result = IOError(src, errno);
+ }
+ return result;
+ }
+
+ virtual Status LockFile(const std::string& fname, FileLock** lock) {
+ *lock = NULL;
+ Status result;
+ int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
+ if (fd < 0) {
+ result = IOError(fname, errno);
+ } else if (LockOrUnlock(fd, true) == -1) {
+ result = IOError("lock " + fname, errno);
+ close(fd);
+ } else {
+ PosixFileLock* my_lock = new PosixFileLock;
+ my_lock->fd_ = fd;
+ *lock = my_lock;
+ }
+ return result;
+ }
+
+ virtual Status UnlockFile(FileLock* lock) {
+ PosixFileLock* my_lock = reinterpret_cast<PosixFileLock*>(lock);
+ Status result;
+ if (LockOrUnlock(my_lock->fd_, false) == -1) {
+ result = IOError("unlock", errno);
+ }
+ close(my_lock->fd_);
+ delete my_lock;
+ return result;
+ }
+
+ virtual void Schedule(void (*function)(void*), void* arg);
+
+ virtual void StartThread(void (*function)(void* arg), void* arg);
+
+ virtual Status GetTestDirectory(std::string* result) {
+ const char* env = getenv("TEST_TMPDIR");
+ if (env && env[0] != '\0') {
+ *result = env;
+ } else {
+ char buf[100];
+ snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid()));
+ *result = buf;
+ }
+ // Directory may already exist
+ CreateDir(*result);
+ return Status::OK();
+ }
+
+ static uint64_t gettid() {
+ pthread_t tid = pthread_self();
+ uint64_t thread_id = 0;
+ memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid)));
+ return thread_id;
+ }
+
+ virtual Status NewLogger(const std::string& fname, Logger** result) {
+ FILE* f = fopen(fname.c_str(), "w");
+ if (f == NULL) {
+ *result = NULL;
+ return IOError(fname, errno);
+ } else {
+ *result = new PosixLogger(f, &PosixEnv::gettid);
+ return Status::OK();
+ }
+ }
+
+ virtual uint64_t NowMicros() {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+ }
+
+ virtual void SleepForMicroseconds(int micros) {
+ usleep(micros);
+ }
+
+#ifdef HAVE_HYPERLEVELDB
+ virtual Status CopyFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+ virtual Status LinkFile(const std::string&, const std::string&) { return Status::NotSupported("sorry!"); }
+#endif
+
+ private:
+ void PthreadCall(const char* label, int result) {
+ if (result != 0) {
+ fprintf(stderr, "pthread %s: %s\n", label, strerror(result));
+ exit(1);
+ }
+ }
+
+ // BGThread() is the body of the background thread
+ void BGThread();
+ static void* BGThreadWrapper(void* arg) {
+ reinterpret_cast<PosixEnv*>(arg)->BGThread();
+ return NULL;
+ }
+
+ size_t page_size_;
+ pthread_mutex_t mu_;
+ pthread_cond_t bgsignal_;
+ pthread_t bgthread_;
+ bool started_bgthread_;
+
+ // Entry per Schedule() call
+ struct BGItem { void* arg; void (*function)(void*); };
+ typedef std::deque<BGItem> BGQueue;
+ BGQueue queue_;
+};
+
+PosixEnv::PosixEnv() : page_size_(getpagesize()),
+ started_bgthread_(false) {
+ PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
+ PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
+}
+
+void PosixEnv::Schedule(void (*function)(void*), void* arg) {
+ PthreadCall("lock", pthread_mutex_lock(&mu_));
+
+ // Start background thread if necessary
+ if (!started_bgthread_) {
+ started_bgthread_ = true;
+ PthreadCall(
+ "create thread",
+ pthread_create(&bgthread_, NULL, &PosixEnv::BGThreadWrapper, this));
+ }
+
+ // If the queue is currently empty, the background thread may currently be
+ // waiting.
+ if (queue_.empty()) {
+ PthreadCall("signal", pthread_cond_signal(&bgsignal_));
+ }
+
+ // Add to priority queue
+ queue_.push_back(BGItem());
+ queue_.back().function = function;
+ queue_.back().arg = arg;
+
+ PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+void PosixEnv::BGThread() {
+ while (true) {
+ // Wait until there is an item that is ready to run
+ PthreadCall("lock", pthread_mutex_lock(&mu_));
+ while (queue_.empty()) {
+ PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_));
+ }
+
+ void (*function)(void*) = queue_.front().function;
+ void* arg = queue_.front().arg;
+ queue_.pop_front();
+
+ PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+ (*function)(arg);
+ }
+}
+
+namespace {
+struct StartThreadState {
+ void (*user_function)(void*);
+ void* arg;
+};
+}
+static void* StartThreadWrapper(void* arg) {
+ StartThreadState* state = reinterpret_cast<StartThreadState*>(arg);
+ state->user_function(state->arg);
+ delete state;
+ return NULL;
+}
+
+void PosixEnv::StartThread(void (*function)(void* arg), void* arg) {
+ pthread_t t;
+ StartThreadState* state = new StartThreadState;
+ state->user_function = function;
+ state->arg = arg;
+ PthreadCall("start thread",
+ pthread_create(&t, NULL, &StartThreadWrapper, state));
+}
+
+} // namespace
+
+static pthread_once_t once = PTHREAD_ONCE_INIT;
+static Env* default_env;
+static void InitDefaultEnv() { default_env = new PosixEnv; }
+
+Env* Env::Default() {
+ pthread_once(&once, InitDefaultEnv);
+ return default_env;
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.cc
new file mode 100644
index 00000000000..96526e76123
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.cc
@@ -0,0 +1,80 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "util/logging.h"
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+void AppendNumberTo(std::string* str, uint64_t num) {
+ char buf[30];
+ snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num);
+ str->append(buf);
+}
+
+void AppendEscapedStringTo(std::string* str, const Slice& value) {
+ for (size_t i = 0; i < value.size(); i++) {
+ char c = value[i];
+ if (c >= ' ' && c <= '~') {
+ str->push_back(c);
+ } else {
+ char buf[10];
+ snprintf(buf, sizeof(buf), "\\x%02x",
+ static_cast<unsigned int>(c) & 0xff);
+ str->append(buf);
+ }
+ }
+}
+
+std::string NumberToString(uint64_t num) {
+ std::string r;
+ AppendNumberTo(&r, num);
+ return r;
+}
+
+std::string EscapeString(const Slice& value) {
+ std::string r;
+ AppendEscapedStringTo(&r, value);
+ return r;
+}
+
+bool ConsumeChar(Slice* in, char c) {
+ if (!in->empty() && (*in)[0] == c) {
+ in->remove_prefix(1);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
+ uint64_t v = 0;
+ int digits = 0;
+ while (!in->empty()) {
+ char c = (*in)[0];
+ if (c >= '0' && c <= '9') {
+ ++digits;
+ const int delta = (c - '0');
+ static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
+ if (v > kMaxUint64/10 ||
+ (v == kMaxUint64/10 && (uint64_t)delta > kMaxUint64%10)) {
+ // Overflow
+ return false;
+ }
+ v = (v * 10) + delta;
+ in->remove_prefix(1);
+ } else {
+ break;
+ }
+ }
+ *val = v;
+ return (digits > 0);
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.h b/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.h
new file mode 100644
index 00000000000..b0c5da813e8
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/logging.h
@@ -0,0 +1,47 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Must not be included from any .h files to avoid polluting the namespace
+// with macros.
+
+#ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_
+#define STORAGE_LEVELDB_UTIL_LOGGING_H_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string>
+#include "port/port.h"
+
+namespace leveldb {
+
+class Slice;
+class WritableFile;
+
+// Append a human-readable printout of "num" to *str
+extern void AppendNumberTo(std::string* str, uint64_t num);
+
+// Append a human-readable printout of "value" to *str.
+// Escapes any non-printable characters found in "value".
+extern void AppendEscapedStringTo(std::string* str, const Slice& value);
+
+// Return a human-readable printout of "num"
+extern std::string NumberToString(uint64_t num);
+
+// Return a human-readable version of "value".
+// Escapes any non-printable characters found in "value".
+extern std::string EscapeString(const Slice& value);
+
+// If *in starts with "c", advances *in past the first character and
+// returns true. Otherwise, returns false.
+extern bool ConsumeChar(Slice* in, char c);
+
+// Parse a human-readable number from "*in" into *value. On success,
+// advances "*in" past the consumed number and sets "*val" to the
+// numeric value. Otherwise, returns false and leaves *in in an
+// unspecified state.
+extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val);
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_LOGGING_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/options.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/options.cc
new file mode 100644
index 00000000000..a8c79233bb5
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/options.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+Options::Options()
+ : comparator(BytewiseComparator()),
+ create_if_missing(false),
+ error_if_exists(false),
+ paranoid_checks(false),
+ env(Env::Default()),
+ info_log(NULL),
+ write_buffer_size(4<<20),
+ max_open_files(1000),
+ block_cache(NULL),
+ block_size(4096),
+ block_restart_interval(16),
+ compression(kSnappyCompression),
+ filter_policy(NULL) {
+}
+
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/posix_logger.h b/src/third_party/wiredtiger/api/leveldb/leveldb/util/posix_logger.h
new file mode 100644
index 00000000000..f15de45e05e
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/posix_logger.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// Logger implementation that can be shared by all environments
+// where enough posix functionality is available.
+
+#ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+#define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
+
+#include <algorithm>
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+class PosixLogger : public Logger {
+ private:
+ FILE* file_;
+ uint64_t (*gettid_)(); // Return the thread id for the current thread
+ public:
+ PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { }
+ virtual ~PosixLogger() {
+ fclose(file_);
+ }
+ virtual void Logv(const char* format, va_list ap) {
+ const uint64_t thread_id = (*gettid_)();
+
+ // We try twice: the first time with a fixed-size stack allocated buffer,
+ // and the second time with a much larger dynamically allocated buffer.
+ char buffer[500];
+ for (int iter = 0; iter < 2; iter++) {
+ char* base;
+ int bufsize;
+ if (iter == 0) {
+ bufsize = sizeof(buffer);
+ base = buffer;
+ } else {
+ bufsize = 30000;
+ base = new char[bufsize];
+ }
+ char* p = base;
+ char* limit = base + bufsize;
+
+ struct timeval now_tv;
+ gettimeofday(&now_tv, NULL);
+ const time_t seconds = now_tv.tv_sec;
+ struct tm t;
+ localtime_r(&seconds, &t);
+ p += snprintf(p, limit - p,
+ "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ",
+ t.tm_year + 1900,
+ t.tm_mon + 1,
+ t.tm_mday,
+ t.tm_hour,
+ t.tm_min,
+ t.tm_sec,
+ static_cast<int>(now_tv.tv_usec),
+ static_cast<long long unsigned int>(thread_id));
+
+ // Print the message
+ if (p < limit) {
+ va_list backup_ap;
+ va_copy(backup_ap, ap);
+ p += vsnprintf(p, limit - p, format, backup_ap);
+ va_end(backup_ap);
+ }
+
+ // Truncate to available space if necessary
+ if (p >= limit) {
+ if (iter == 0) {
+ continue; // Try again with larger buffer
+ } else {
+ p = limit - 1;
+ }
+ }
+
+ // Add newline if necessary
+ if (p == base || p[-1] != '\n') {
+ *p++ = '\n';
+ }
+
+ assert(p <= limit);
+ fwrite(base, 1, p - base, file_);
+ fflush(file_);
+ if (base != buffer) {
+ delete[] base;
+ }
+ break;
+ }
+ }
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/random.h b/src/third_party/wiredtiger/api/leveldb/leveldb/util/random.h
new file mode 100644
index 00000000000..66e0c94e7cb
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/random.h
@@ -0,0 +1,72 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_
+#define STORAGE_LEVELDB_UTIL_RANDOM_H_
+
+#include <stdint.h>
+
+namespace leveldb {
+
+// A very simple random number generator. Not especially good at
+// generating truly random bits, but good enough for our needs in this
+// package.
+class Random {
+ private:
+ uint32_t seed_;
+ public:
+ explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { }
+ uint32_t Next() {
+ static const uint32_t M = 2147483647L; // 2^31-1
+ static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
+ // We are computing
+ // seed_ = (seed_ * A) % M, where M = 2^31-1
+ //
+ // seed_ must not be zero or M, or else all subsequent computed values
+ // will be zero or M respectively. For all other values, seed_ will end
+ // up cycling through every number in [1,M-1]
+ uint64_t product = seed_ * A;
+
+ // Compute (product % M) using the fact that ((x << 31) % M) == x.
+ seed_ = static_cast<uint32_t>((product >> 31) + (product & M));
+ // The first reduction may overflow by 1 bit, so we may need to
+ // repeat. mod == M is not possible; using > allows the faster
+ // sign-bit-based test.
+ if (seed_ > M) {
+ seed_ -= M;
+ }
+ return seed_;
+ }
+ // Returns a uniformly distributed value in the range [0..n-1]
+ // REQUIRES: n > 0
+ uint32_t Uniform(int n) { return Next() % n; }
+
+ // Randomly returns true ~"1/n" of the time, and false otherwise.
+ // REQUIRES: n > 0
+ bool OneIn(int n) { return (Next() % n) == 0; }
+
+ // Skewed: pick "base" uniformly from range [0,max_log] and then
+ // return "base" random bits. The effect is to pick a number in the
+ // range [0,2^max_log-1] with exponential bias towards smaller numbers.
+ uint32_t Skewed(int max_log) {
+ return Uniform(1 << Uniform(max_log + 1));
+ }
+
+ // Shuffle the array into random order
+ void Shuffle(int *array, int n) {
+ if (n > 1) {
+ int i;
+ for (i=0; i<n-1; i++) {
+ int j = i + Next() / (2147483647 / (n-i) + 1);
+ int t = array[j];
+ array[j] = array[i];
+ array[i] = t;
+ }
+ }
+ }
+};
+
+} // namespace leveldb
+
+#endif // STORAGE_LEVELDB_UTIL_RANDOM_H_
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb/util/status.cc b/src/third_party/wiredtiger/api/leveldb/leveldb/util/status.cc
new file mode 100644
index 00000000000..e8edd9dbb11
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb/util/status.cc
@@ -0,0 +1,74 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <stdio.h>
+#include "leveldb_wt.h"
+
+namespace leveldb {
+
+const char* Status::CopyState(const char* state) {
+ uint32_t size;
+ memcpy(&size, state, sizeof(size));
+ char* result = new char[size + 5];
+ memcpy(result, state, size + 5);
+ return result;
+}
+
+Status::Status(Code code_arg, const Slice& msg, const Slice& msg2) {
+ assert(code_arg != kOk);
+ const uint32_t len1 = msg.size();
+ const uint32_t len2 = msg2.size();
+ const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
+ char* result = new char[size + 5];
+ memcpy(result, &size, sizeof(size));
+ result[4] = static_cast<char>(code_arg);
+ memcpy(result + 5, msg.data(), len1);
+ if (len2) {
+ result[5 + len1] = ':';
+ result[6 + len1] = ' ';
+ memcpy(result + 7 + len1, msg2.data(), len2);
+ }
+ state_ = result;
+}
+
+std::string Status::ToString() const {
+ if (state_ == NULL) {
+ return "OK";
+ } else {
+ char tmp[30];
+ const char* type;
+ switch (code()) {
+ case kOk:
+ type = "OK";
+ break;
+ case kNotFound:
+ type = "NotFound: ";
+ break;
+ case kCorruption:
+ type = "Corruption: ";
+ break;
+ case kNotSupported:
+ type = "Not implemented: ";
+ break;
+ case kInvalidArgument:
+ type = "Invalid argument: ";
+ break;
+ case kIOError:
+ type = "IO error: ";
+ break;
+ default:
+ snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
+ static_cast<int>(code()));
+ type = tmp;
+ break;
+ }
+ std::string result(type);
+ uint32_t length;
+ memcpy(&length, state_, sizeof(length));
+ result.append(state_ + 5, length);
+ return result;
+ }
+}
+
+} // namespace leveldb
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb_test.cc b/src/third_party/wiredtiger/api/leveldb/leveldb_test.cc
new file mode 100644
index 00000000000..25cfe0c379e
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb_test.cc
@@ -0,0 +1,141 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <iostream>
+#include "leveldb_wt.h"
+
+using namespace std;
+
+extern "C" int main() {
+ leveldb::DB* db;
+ leveldb::Options options;
+ options.create_if_missing = true;
+ leveldb::Status s = leveldb::DB::Open(options, "WTLDB_HOME", &db);
+ assert(s.ok());
+
+ s = db->Put(leveldb::WriteOptions(), "key", "value");
+ s = db->Put(leveldb::WriteOptions(), "key2", "value2");
+ s = db->Put(leveldb::WriteOptions(), "key3", "value3");
+ s = db->Put(leveldb::WriteOptions(), "key4", "value4");
+ assert(s.ok());
+
+#ifdef HAVE_HYPERLEVELDB
+ leveldb::ReplayIterator* replay_start;
+ leveldb::ReplayIterator* replay_ts;
+ leveldb::ReplayIterator* replay_now;
+ leveldb::ReplayIterator* replay_last;
+ std::string timestamp;
+ std::string timestamp_last;
+
+ cout << "Perform Live Backup" << endl;
+ s = db->LiveBackup("test");
+
+ // Test out a bunch of the ReplayIterator methods.
+ db->GetReplayTimestamp(&timestamp);
+ cout << "timestamp 1 " << timestamp << endl << "Put key5" << endl;
+ s = db->Put(leveldb::WriteOptions(), "key5", "value5");
+ db->GetReplayTimestamp(&timestamp_last);
+ // Verify a bunch of timestamp comparisons
+ cout << "timestamp 2 " << timestamp_last << endl;
+ cout << "CompareTimestamps tests" << endl;
+ assert(db->CompareTimestamps(timestamp, timestamp_last) < 0);
+ assert(db->CompareTimestamps("all", timestamp_last) < 0);
+ assert(db->CompareTimestamps(timestamp, "now") < 0);
+ assert(db->CompareTimestamps("now", timestamp_last) == 0);
+ assert(db->CompareTimestamps(timestamp_last, "now") == 0);
+ assert(db->CompareTimestamps("now", timestamp) > 0);
+ assert(db->CompareTimestamps("now", "all") > 0);
+
+ s = db->GetReplayIterator("all", &replay_start);
+ assert(replay_start->Valid());
+ cout << "Replay at all(start):" << endl;
+ cout << replay_start->key().ToString() << ": " << replay_start->value().ToString() << endl;
+ s = db->GetReplayIterator(timestamp, &replay_ts);
+ assert(replay_ts->Valid());
+ cout << "Replay at timestamp " << timestamp << ":" << endl;
+ cout << replay_ts->key().ToString() << ": " << replay_ts->value().ToString() << endl;
+ s = db->GetReplayIterator("now", &replay_now);
+ assert(replay_now->Valid());
+ cout << "Replay at now(end):" << endl;
+ cout << replay_now->key().ToString() << ": " << replay_now->value().ToString() << endl;
+ s = db->GetReplayIterator(timestamp_last, &replay_last);
+ assert(replay_last->Valid());
+ cout << "Replay at last timestamp " << timestamp_last << ":" << endl;
+ cout << replay_last->key().ToString() << ": " << replay_last->value().ToString() << endl;
+ assert(replay_now->key().ToString() == replay_last->key().ToString());
+ cout << "Replay walk from all/start:" << endl;
+ while (replay_start->Valid()) {
+ cout << replay_start->key().ToString() << ": " << replay_start->value().ToString() << endl;
+ replay_start->Next();
+ }
+ // We reached the end of log, iterator should still not be valid.
+ // But if we write something, the iterator should find it and become
+ // valid again.
+ assert(!replay_start->Valid());
+ s = db->Put(leveldb::WriteOptions(), "key6", "value6");
+ assert(replay_start->Valid());
+ db->ReleaseReplayIterator(replay_start);
+ db->ReleaseReplayIterator(replay_ts);
+ db->ReleaseReplayIterator(replay_now);
+ db->ReleaseReplayIterator(replay_last);
+#endif
+
+ // Read through the main database
+ cout << "Read main database:" << endl;
+ leveldb::ReadOptions read_options;
+ read_options.snapshot = db->GetSnapshot();
+ leveldb::Iterator* iter = db->NewIterator(read_options);
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ cout << iter->key().ToString() << ": " << iter->value().ToString() << endl;
+ }
+
+ delete iter;
+ db->ReleaseSnapshot(read_options.snapshot);
+
+ delete db;
+
+#ifdef HAVE_HYPERLEVELDB
+ // Read through the backup database
+ leveldb::DB* db_bkup;
+ options.create_if_missing = false;
+ s = leveldb::DB::Open(options, "WTLDB_HOME/backup-test", &db_bkup);
+ read_options.snapshot = db_bkup->GetSnapshot();
+ iter = db_bkup->NewIterator(read_options);
+ cout << "Read Backup database:" << endl;
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+ cout << iter->key().ToString() << ": " << iter->value().ToString() << endl;
+ }
+
+ delete iter;
+ db_bkup->ReleaseSnapshot(read_options.snapshot);
+
+ delete db_bkup;
+#endif
+
+ return (0);
+}
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb_wt.cc b/src/third_party/wiredtiger/api/leveldb/leveldb_wt.cc
new file mode 100644
index 00000000000..8fc7d1ca092
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb_wt.cc
@@ -0,0 +1,810 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "leveldb_wt.h"
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sstream>
+
+#if HAVE_BASHOLEVELDB
+namespace leveldb {
+Value::~Value() {}
+
+class StringValue : public Value {
+ public:
+ explicit StringValue(std::string& val) : value_(val) {}
+ ~StringValue() {}
+
+ StringValue& assign(const char* data, size_t size) {
+ value_.assign(data, size);
+ return *this;
+ }
+
+ private:
+ std::string& value_;
+};
+}
+#endif
+
+Status leveldb::DestroyDB(const std::string& name, const Options& options) {
+ WT_CONNECTION *conn;
+ int ret, t_ret;
+ /* If the database doesn't exist, there is nothing to destroy. */
+ if (access((name + "/WiredTiger").c_str(), F_OK) != 0)
+ return Status::OK();
+ if ((ret = ::wiredtiger_open(name.c_str(), NULL, NULL, &conn)) != 0)
+ return WiredTigerErrorToStatus(ret, NULL);
+ WT_SESSION *session;
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ goto cleanup;
+ if ((ret = session->drop(session, WT_URI, "force")) != 0)
+ goto cleanup;
+
+cleanup:
+ if ((t_ret = conn->close(conn, NULL)) != 0 && ret == 0)
+ ret = t_ret;
+ return WiredTigerErrorToStatus(ret, NULL);
+}
+
+Status leveldb::RepairDB(const std::string& dbname, const Options& options) {
+ return Status::NotSupported("sorry!");
+}
+
+/* Destructors required for interfaces. */
+leveldb::DB::~DB() {}
+Snapshot::~Snapshot() {}
+
+Status WiredTigerErrorToStatus(int wiredTigerError, const char *msg) {
+ if (wiredTigerError == 0)
+ return Status::OK();
+
+ if (msg == NULL)
+ msg = wiredtiger_strerror(wiredTigerError);
+
+ if (wiredTigerError != WT_NOTFOUND)
+ printf("Failing status: %d -> %s\n", wiredTigerError, msg);
+
+ if (wiredTigerError == WT_NOTFOUND)
+ return Status::NotFound(Slice(msg));
+ else if (wiredTigerError == WT_ERROR || wiredTigerError == WT_PANIC)
+ return Status::Corruption(Slice(msg));
+ else if (wiredTigerError == ENOTSUP)
+ return Status::NotSupported(Slice(msg));
+ else if (wiredTigerError == EINVAL)
+ return Status::InvalidArgument(Slice(msg));
+ else if (wiredTigerError == EPERM || wiredTigerError == ENOENT ||
+ wiredTigerError == EIO || wiredTigerError == EBADF ||
+ wiredTigerError == EEXIST || wiredTigerError == ENOSPC)
+ return Status::IOError(Slice(msg));
+ else if (wiredTigerError == WT_ROLLBACK)
+ return Status::IOError("ROLLBACK"); // TODO: Is this the best translation?
+ else
+ return Status::Corruption(Slice(msg));
+}
+
+/* Iterators, from leveldb/table/iterator.cc */
+Iterator::Iterator() {
+ cleanup_.function = NULL;
+ cleanup_.next = NULL;
+}
+
+Iterator::~Iterator() {
+ if (cleanup_.function != NULL) {
+ (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2);
+ for (Cleanup* c = cleanup_.next; c != NULL; ) {
+ (*c->function)(c->arg1, c->arg2);
+ Cleanup* next = c->next;
+ delete c;
+ c = next;
+ }
+ }
+}
+
+void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
+ assert(func != NULL);
+ Cleanup* c;
+ if (cleanup_.function == NULL) {
+ c = &cleanup_;
+ } else {
+ c = new Cleanup;
+ c->next = cleanup_.next;
+ cleanup_.next = c;
+ }
+ c->function = func;
+ c->arg1 = arg1;
+ c->arg2 = arg2;
+}
+
+namespace {
+class EmptyIterator : public Iterator {
+ public:
+ EmptyIterator(const Status& s) : status_(s) { }
+ virtual bool Valid() const { return false; }
+ virtual void Seek(const Slice& target) { }
+ virtual void SeekToFirst() { }
+ virtual void SeekToLast() { }
+ virtual void Next() { assert(false); }
+ virtual void Prev() { assert(false); }
+ Slice key() const { assert(false); return Slice(); }
+ Slice value() const { assert(false); return Slice(); }
+ virtual Status status() const { return status_; }
+ private:
+ Status status_;
+};
+} // namespace
+
+Iterator* NewEmptyIterator() {
+ return new EmptyIterator(Status::OK());
+}
+
+Iterator* NewErrorIterator(const Status& status) {
+ return new EmptyIterator(status);
+}
+
+namespace {
+class FilterPolicyImpl : public FilterPolicy {
+public:
+ FilterPolicyImpl(int bits_per_key) : bits_per_key_(bits_per_key) {}
+ ~FilterPolicyImpl() {}
+ virtual const char *Name() const { return "FilterPolicyImpl"; }
+ virtual void CreateFilter(const Slice *keys, int n, std::string *dst) const {}
+ virtual bool KeyMayMatch(const Slice &key, const Slice &filter) const { return true; }
+
+ int bits_per_key_;
+};
+};
+
+namespace leveldb {
+FilterPolicy::~FilterPolicy() {}
+
+const FilterPolicy *NewBloomFilterPolicy(int bits_per_key) {
+ return new FilterPolicyImpl(bits_per_key);
+}
+#if HAVE_BASHOLEVELDB
+const FilterPolicy *NewBloomFilterPolicy2(int bits_per_key) {
+ return NewBloomFilterPolicy(bits_per_key);
+}
+#endif
+
+Cache::~Cache() {}
+Cache *NewLRUCache(size_t capacity) {
+ return new CacheImpl(capacity);
+}
+}
+
+int
+wtleveldb_create(
+ WT_CONNECTION *conn, const Options &options, std::string const &uri)
+{
+ int ret;
+ std::stringstream s_table;
+ s_table << WT_TABLE_CONFIG;
+ s_table << "internal_page_max=" << options.block_size << ",";
+ s_table << "leaf_page_max=" << options.block_size << ",";
+ s_table << "leaf_item_max=" << options.block_size / 4 << ",";
+ if (options.compression == leveldb::kSnappyCompression)
+ s_table << "block_compressor=snappy,";
+#ifdef HAVE_ROCKSDB
+ if (options.compression == leveldb::kZlibCompression)
+ s_table << "block_compressor=zlib,";
+#endif
+ s_table << "lsm=(";
+ s_table << "chunk_size=" << options.write_buffer_size << ",";
+ if (options.filter_policy) {
+ int bits = ((FilterPolicyImpl *)options.filter_policy)->bits_per_key_;
+ s_table << "bloom_bit_count=" << bits << ",";
+ // Approximate the optimal number of hashes
+ s_table << "bloom_hash_count=" << (int)(0.6 * bits) << ",";
+ }
+ s_table << "),";
+ WT_SESSION *session;
+ std::string table_config = s_table.str();
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ return (ret);
+ if ((ret = session->create(session, uri.c_str(), table_config.c_str())) != 0)
+ return (ret);
+ if ((ret = session->close(session, NULL)) != 0)
+ return (ret);
+
+ return (0);
+}
+
+Status
+leveldb::DB::Open(const Options &options, const std::string &name, leveldb::DB **dbptr)
+{
+ // Build the wiredtiger_open config.
+ std::stringstream s_conn;
+ s_conn << WT_CONN_CONFIG;
+ if (options.create_if_missing) {
+ (void)mkdir(name.c_str(), 0777);
+ s_conn << "create,";
+ }
+ if (options.error_if_exists)
+ s_conn << "exclusive,";
+#ifndef HAVE_BUILTIN_EXTENSION_SNAPPY
+ if (options.compression == kSnappyCompression)
+ s_conn << "extensions=[libwiredtiger_snappy.so],";
+#endif
+#ifdef HAVE_ROCKSDB
+#ifndef HAVE_BUILTIN_ZLIB
+ if (options.compression == kZlibCompression)
+ s_conn << "extensions=[libwiredtiger_zlib.so],";
+#endif
+#endif
+ size_t cache_size = 2 * options.write_buffer_size;
+ cache_size += (size_t)options.max_open_files * (4 << 20);
+ if (options.block_cache)
+ cache_size += ((CacheImpl *)options.block_cache)->capacity_;
+ else
+ cache_size += 100 << 20;
+ s_conn << "cache_size=" << cache_size << ",";
+ std::string conn_config = s_conn.str();
+
+ WT_CONNECTION *conn;
+ printf("Open: home %s config %s\r\n",name.c_str(),conn_config.c_str());
+ int ret = ::wiredtiger_open(name.c_str(), NULL, conn_config.c_str(), &conn);
+ if (ret == ENOENT)
+ return Status::NotFound(Slice("Database does not exist."));
+ else if (ret == EEXIST)
+ return Status::NotFound(Slice("Database already exists."));
+ else if (ret != 0)
+ return WiredTigerErrorToStatus(ret, NULL);
+
+ if (options.create_if_missing)
+ ret = wtleveldb_create(conn, options, WT_URI);
+
+ if (ret != 0) {
+ conn->close(conn, NULL);
+ return WiredTigerErrorToStatus(ret, NULL);
+ }
+ *dbptr = new DbImpl(conn);
+ return Status::OK();
+}
+
+// Set the database entry for "key" to "value". Returns OK on success,
+// and a non-OK status on error.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Put(const WriteOptions& options,
+ const Slice& key, const Slice& value)
+{
+ WT_CURSOR *cursor = GetContext()->GetCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor->set_value(cursor, &item);
+ int ret = cursor->insert(cursor);
+ return WiredTigerErrorToStatus(ret, NULL);
+}
+
+// Remove the database entry (if any) for "key". Returns OK on
+// success, and a non-OK status on error. It is not an error if "key"
+// did not exist in the database.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Delete(const WriteOptions& options, const Slice& key)
+{
+ WT_CURSOR *cursor = GetContext()->GetCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->remove(cursor);
+ // Reset the WiredTiger cursor so it doesn't keep any pages pinned. Track
+ // failures in debug builds since we don't expect failure, but don't pass
+ // failures on - it's not necessary for correct operation.
+ if (ret == 0) {
+ int t_ret = cursor->reset(cursor);
+ assert(t_ret == 0);
+ } else if (ret == WT_NOTFOUND)
+ ret = 0;
+ return WiredTigerErrorToStatus(ret, NULL);
+}
+
+void
+WriteBatchHandler::Put(const Slice& key, const Slice& value) {
+ WT_CURSOR *cursor = context_->GetCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor->set_value(cursor, &item);
+ int ret = cursor->insert(cursor);
+ if (ret != 0 && status_ == 0)
+ status_ = ret;
+}
+
+void WriteBatchHandler::Delete(const Slice& key) {
+ WT_CURSOR *cursor = context_->GetCursor();
+ WT_ITEM item;
+
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->remove(cursor);
+ if (ret != 0 && ret != WT_NOTFOUND && status_ == 0)
+ status_ = ret;
+}
+
+// Apply the specified updates to the database.
+// Returns OK on success, non-OK on failure.
+// Note: consider setting options.sync = true.
+Status
+DbImpl::Write(const WriteOptions& options, WriteBatch* updates)
+{
+ const char *errmsg = NULL;
+ Status status = Status::OK();
+ OperationContext *context = GetContext();
+ WT_SESSION *session = context->GetSession();
+ int ret = 0, t_ret;
+
+#ifdef HAVE_ROCKSDB
+ int need_txn = (updates->Count() > 1);
+#else
+ int need_txn = 1;
+#endif
+
+ for (;;) {
+ if (need_txn && (ret = session->begin_transaction(session, NULL)) != 0) {
+ errmsg = "Begin transaction failed in Write batch";
+ goto err;
+ }
+
+ WriteBatchHandler handler(this, context);
+#if 0
+ status = updates->Iterate(&handler);
+#else
+ try {
+ status = updates->Iterate(&handler);
+ } catch(...) {
+ if (need_txn)
+ (void)session->rollback_transaction(session, NULL);
+ throw;
+ }
+#endif
+ if (!status.ok() || (ret = handler.GetWiredTigerStatus()) != WT_ROLLBACK)
+ break;
+ // Roll back the transaction on deadlock so we can try again
+ if (need_txn && (ret = session->rollback_transaction(session, NULL)) != 0) {
+ errmsg = "Rollback transaction failed in Write batch";
+ goto err;
+ }
+ }
+
+ if (need_txn && status.ok() && ret == 0) {
+ ret = session->commit_transaction(session, NULL);
+ } else if (need_txn) {
+ t_ret = session->rollback_transaction(session, NULL);
+ if (ret == 0)
+ ret = t_ret;
+ }
+
+err:
+ if (status.ok() && ret != 0)
+ status = WiredTigerErrorToStatus(ret, errmsg);
+ return status;
+}
+
+// If the database contains an entry for "key" store the
+// corresponding value in *value and return OK.
+//
+// If there is no entry for "key" leave *value unchanged and return
+// a status for which Status::IsNotFound() returns true.
+//
+// May return some other Status on an error.
+Status
+DbImpl::Get(const ReadOptions& options,
+ const Slice& key, std::string* value)
+{
+ WT_CURSOR *cursor = GetContext(options)->GetCursor();
+ const char *errmsg = NULL;
+
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->search(cursor);
+ if (ret == 0) {
+ ret = cursor->get_value(cursor, &item);
+ if (ret == 0) {
+ // Make a copy of the value to return, then the cursor can be reset
+ *value = std::string((const char *)item.data, item.size);
+ ret = cursor->reset(cursor);
+ }
+ } else if (ret == WT_NOTFOUND)
+ errmsg = "DB::Get key not found";
+ return WiredTigerErrorToStatus(ret, errmsg);
+}
+
+#if HAVE_BASHOLEVELDB
+// If the database contains an entry for "key" store the
+// corresponding value in *value and return OK.
+//
+// If there is no entry for "key" leave *value unchanged and return
+// a status for which Status::IsNotFound() returns true.
+//
+// May return some other Status on an error.
+Status
+DbImpl::Get(const ReadOptions& options,
+ const Slice& key, Value* value)
+{
+ const char *errmsg = NULL;
+
+ WT_CURSOR *cursor = GetContext(options)->GetCursor();
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ int ret = cursor->search(cursor);
+ if (ret == 0) {
+ ret = cursor->get_value(cursor, &item);
+ if (ret == 0) {
+ // This call makes a copy, reset the cursor afterwards.
+ value->assign((const char *)item.data, item.size);
+ ret = cursor->reset(cursor);
+ }
+ } else if (ret == WT_NOTFOUND)
+ errmsg = "DB::Get key not found";
+err:
+ return WiredTigerErrorToStatus(ret, errmsg);
+}
+#endif
+
+// Return a heap-allocated iterator over the contents of the database.
+// The result of NewIterator() is initially invalid (caller must
+// call one of the Seek methods on the iterator before using it).
+//
+// Caller should delete the iterator when it is no longer needed.
+// The returned iterator should be deleted before this db is deleted.
+Iterator *
+DbImpl::NewIterator(const ReadOptions& options)
+{
+ /* Iterators own the cursor until they are closed. */
+ OperationContext *context = GetContext(options);
+ WT_CURSOR *c = context->GetCursor();
+ context->SetCursor(NULL);
+ return new IteratorImpl(this, c);
+}
+
+SnapshotImpl::SnapshotImpl(DbImpl *db) :
+ Snapshot(), db_(db), context_(db->NewContext()), status_(Status::OK())
+{
+}
+
+// Return a handle to the current DB state. Iterators created with
+// this handle will all observe a stable snapshot of the current DB
+// state. The caller must call ReleaseSnapshot(result) when the
+// snapshot is no longer needed.
+const Snapshot *
+DbImpl::GetSnapshot()
+{
+ SnapshotImpl *si = new SnapshotImpl(this);
+ WT_SESSION *session = si->GetContext()->GetSession();
+ int ret = session->begin_transaction(session, NULL);
+ assert(ret == 0);
+ return si;
+}
+
+// Release a previously acquired snapshot. The caller must not
+// use "snapshot" after this call.
+void
+DbImpl::ReleaseSnapshot(const Snapshot* snapshot)
+{
+ SnapshotImpl *si =
+ static_cast<SnapshotImpl *>(const_cast<Snapshot *>(snapshot));
+ if (si != NULL) {
+ // We started a transaction: we could commit it here, but it will be rolled
+ // back automatically by closing the session, which we have to do anyway.
+ int ret = si->GetContext()->Close();
+ assert(ret == 0);
+ delete si;
+ }
+}
+
+// DB implementations can export properties about their state
+// via this method. If "property" is a valid property understood by this
+// DB implementation, fills "*value" with its current value and returns
+// true. Otherwise returns false.
+//
+//
+// Valid property names include:
+//
+// "leveldb.num-files-at-level<N>" - return the number of files at level <N>,
+// where <N> is an ASCII representation of a level number (e.g. "0").
+// "leveldb.stats" - returns a multi-line string that describes statistics
+// about the internal operation of the DB.
+// "leveldb.sstables" - returns a multi-line string that describes all
+// of the sstables that make up the db contents.
+bool
+DbImpl::GetProperty(const Slice& property, std::string* value)
+{
+ /* Not supported */
+ return false;
+}
+
+// For each i in [0,n-1], store in "sizes[i]", the approximate
+// file system space used by keys in "[range[i].start .. range[i].limit)".
+//
+// Note that the returned sizes measure file system space usage, so
+// if the user data compresses by a factor of ten, the returned
+// sizes will be one-tenth the size of the corresponding user data size.
+//
+// The results may not include the sizes of recently written data.
+void
+DbImpl::GetApproximateSizes(const Range* range, int n,
+ uint64_t* sizes)
+{
+ int i;
+
+ /* XXX Not supported */
+ for (i = 0; i < n; i++)
+ sizes[i] = 1;
+}
+
+// Compact the underlying storage for the key range [*begin,*end].
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// begin==NULL is treated as a key before all keys in the database.
+// end==NULL is treated as a key after all keys in the database.
+// Therefore the following call will compact the entire database:
+// db->CompactRange(NULL, NULL);
+void
+DbImpl::CompactRange(const Slice* begin, const Slice* end)
+{
+ // The compact doesn't need a cursor, but the context always opens a
+ // cursor when opening the session - so grab that, and use the session.
+ WT_CURSOR *cursor = GetContext()->GetCursor();
+ WT_SESSION *session = cursor->session;
+ int ret = session->compact(session, WT_URI, NULL);
+ assert(ret == 0);
+}
+
+// Suspends the background compaction thread. This methods
+// returns once suspended.
+void
+DbImpl::SuspendCompactions()
+{
+ /* Not supported */
+}
+
+// Resumes a suspended background compation thread.
+void
+DbImpl::ResumeCompactions()
+{
+ /* Not supported */
+}
+
+IteratorImpl::~IteratorImpl()
+{
+ if (cursor_ != NULL) {
+ OperationContext *context = db_->GetContext();
+ /*
+ * If we are in the same thread where the iterator was opened, and there is
+ * no cursor stashed there, return it.
+ */
+ if (cursor_->session == context->GetSession()) {
+#ifdef HAVE_ROCKSDB
+ if (context->GetCursor(id_) == NULL) {
+ context->SetCursor(id_, cursor_);
+ cursor_ = NULL;
+ }
+#else
+ if (context->GetCursor() == NULL) {
+ context->SetCursor(cursor_);
+ cursor_ = NULL;
+ }
+#endif
+ }
+ if (cursor_ != NULL) {
+ int ret = cursor_->close(cursor_);
+ assert(ret == 0);
+ }
+ }
+}
+
+// Position at the first key in the source. The iterator is Valid()
+// after this call iff the source is not empty.
+void
+IteratorImpl::SeekToFirst()
+{
+ int ret;
+ WT_ITEM item;
+
+ if (!Status().ok())
+ return;
+
+ if ((ret = cursor_->reset(cursor_)) != 0) {
+ SetError(ret);
+ return;
+ }
+ ret = cursor_->next(cursor_);
+ if (ret == WT_NOTFOUND) {
+ valid_ = false;
+ return;
+ } else if (ret != 0) {
+ SetError(ret);
+ return;
+ }
+ if ((ret = cursor_->get_key(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ key_ = Slice((const char *)item.data, item.size);
+ if ((ret = cursor_->get_value(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Position at the last key in the source. The iterator is
+// Valid() after this call iff the source is not empty.
+void
+IteratorImpl::SeekToLast()
+{
+ int ret;
+ WT_ITEM item;
+
+ if (!Status().ok())
+ return;
+
+ if ((ret = cursor_->reset(cursor_)) != 0) {
+ SetError(ret);
+ return;
+ }
+ ret = cursor_->prev(cursor_);
+ if (ret == WT_NOTFOUND) {
+ valid_ = false;
+ return;
+ } else if (ret != 0) {
+ SetError(ret);
+ return;
+ }
+ if ((ret = cursor_->get_key(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ key_ = Slice((const char *)item.data, item.size);
+ if ((ret = cursor_->get_value(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Position at the first key in the source that at or past target
+// The iterator is Valid() after this call iff the source contains
+// an entry that comes at or past target.
+void
+IteratorImpl::Seek(const Slice& target)
+{
+ WT_ITEM item;
+
+ if (!Status().ok())
+ return;
+
+ item.data = target.data();
+ item.size = target.size();
+ cursor_->set_key(cursor_, &item);
+ int cmp, ret = cursor_->search_near(cursor_, &cmp);
+ if (ret == 0 && cmp < 0)
+ ret = cursor_->next(cursor_);
+ if (ret != 0) {
+ if (ret != WT_NOTFOUND)
+ SetError(ret);
+ valid_ = false;
+ return;
+ }
+ if ((ret = cursor_->get_key(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ key_ = Slice((const char *)item.data, item.size);
+ if ((ret = cursor_->get_value(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Moves to the next entry in the source. After this call, Valid() is
+// true iff the iterator was not positioned at the last entry in the source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Next()
+{
+ int ret;
+ WT_ITEM item;
+
+ if (!Status().ok() || !valid_)
+ return;
+
+ ret = cursor_->next(cursor_);
+ if (ret != 0) {
+ if (ret != WT_NOTFOUND)
+ SetError(ret);
+ valid_ = false;
+ return;
+ }
+ if ((ret = cursor_->get_key(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ key_ = Slice((const char *)item.data, item.size);
+ if ((ret = cursor_->get_value(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
+
+// Moves to the previous entry in the source. After this call, Valid() is
+// true iff the iterator was not positioned at the first entry in source.
+// REQUIRES: Valid()
+void
+IteratorImpl::Prev()
+{
+ WT_ITEM item;
+
+ if (!Status().ok() || !valid_)
+ return;
+
+ int ret = cursor_->prev(cursor_);
+ if (ret != 0) {
+ if (ret != WT_NOTFOUND)
+ SetError(ret);
+ valid_ = false;
+ return;
+ }
+ if ((ret = cursor_->get_key(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ key_ = Slice((const char *)item.data, item.size);
+ if ((ret = cursor_->get_value(cursor_, &item)) != 0) {
+ SetError(ret);
+ return;
+ }
+ value_ = Slice((const char *)item.data, item.size);
+ valid_ = true;
+}
diff --git a/src/third_party/wiredtiger/api/leveldb/leveldb_wt.h b/src/third_party/wiredtiger/api/leveldb/leveldb_wt.h
new file mode 100644
index 00000000000..dc185183034
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/leveldb_wt.h
@@ -0,0 +1,461 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _INCLUDE_LEVELDB_WT_H
+#define _INCLUDE_LEVELDB_WT_H 1
+
+#include "leveldb_wt_config.h"
+
+#include "leveldb/cache.h"
+#include "leveldb/comparator.h"
+#include "leveldb/db.h"
+#include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
+#include "leveldb/slice.h"
+#include "leveldb/status.h"
+#include "leveldb/write_batch.h"
+#if HAVE_BASHO_LEVELDB
+#include "basho/perf_count.h"
+#endif
+
+#include "wiredtiger.h"
+
+#define WT_URI "table:data"
+#define WT_CONN_CONFIG \
+ "log=(enabled),checkpoint=(wait=180),checkpoint_sync=false," \
+ "session_max=8192,mmap=false," \
+ "transaction_sync=(enabled=true,method=none),"
+// Note: LSM doesn't split, build full pages from the start
+#define WT_TABLE_CONFIG "type=lsm,split_pct=100,leaf_item_max=1KB," \
+ "lsm=(chunk_size=100MB,bloom_config=(leaf_page_max=8MB)),"
+#define WT_TIMESTAMP_FORMAT "%d.%llu"
+// We're also only interested in operations to the user file. Skip over
+// any changes to the metadata.
+// !!! Currently WT guarantees that the metadata file is always at
+// fileid 0 and the implementation here only uses one table. This will
+// breakdown if either of those assumptions changes.
+#define WT_VALID_OPERATION(fileid, optype) \
+ ((fileid) != 0 && \
+ ((optype) == WT_LOGOP_COL_PUT || \
+ (optype) == WT_LOGOP_COL_REMOVE || \
+ (optype) == WT_LOGOP_ROW_PUT || \
+ (optype) == WT_LOGOP_ROW_REMOVE))
+
+using leveldb::Cache;
+using leveldb::FilterPolicy;
+using leveldb::Iterator;
+using leveldb::Options;
+using leveldb::ReadOptions;
+using leveldb::WriteBatch;
+using leveldb::WriteOptions;
+using leveldb::Range;
+using leveldb::Slice;
+using leveldb::Snapshot;
+using leveldb::Status;
+#if HAVE_BASHOLEVELDB
+using leveldb::Value;
+#endif
+#if HAVE_ROCKSDB
+using leveldb::FlushOptions;
+using leveldb::ColumnFamilyHandle;
+#endif
+
+extern Status WiredTigerErrorToStatus(int wiredTigerError, const char *msg = "");
+
+/* POSIX thread-local storage */
+template <class T>
+class ThreadLocal {
+public:
+ static void cleanup(void *val) {
+ delete (T *)val;
+ }
+
+ ThreadLocal() {
+ int ret = pthread_key_create(&key_, cleanup);
+ assert(ret == 0);
+ }
+
+ ~ThreadLocal() {
+ int ret = pthread_key_delete(key_);
+ assert(ret == 0);
+ }
+
+ T *Get() {
+ return (T *)(pthread_getspecific(key_));
+ }
+
+ void Set(T *value) {
+ int ret = pthread_setspecific(key_, value);
+ assert(ret == 0);
+ }
+
+private:
+ pthread_key_t key_;
+};
+
+/* WiredTiger implementations. */
+class DbImpl;
+
+/* Context for operations (including snapshots, write batches, transactions) */
+class OperationContext {
+public:
+ OperationContext(WT_CONNECTION *conn) {
+ int ret = conn->open_session(conn, NULL, "isolation=snapshot", &session_);
+ assert(ret == 0);
+ ret = session_->open_cursor(
+ session_, WT_URI, NULL, NULL, &cursor_);
+ assert(ret == 0);
+ }
+
+ ~OperationContext() {
+#ifdef WANT_SHUTDOWN_RACES
+ int ret = Close();
+ assert(ret == 0);
+#endif
+ }
+
+ int Close() {
+ int ret = 0;
+ if (session_ != NULL)
+ ret = session_->close(session_, NULL);
+ session_ = NULL;
+ return (ret);
+ }
+
+ WT_CURSOR *GetCursor() { return cursor_; }
+ void SetCursor(WT_CURSOR *c) { cursor_ = c; }
+#ifdef HAVE_ROCKSDB
+ WT_CURSOR *GetCursor(u_int i) {
+ return (i < cursors_.size()) ? cursors_[i] : NULL;
+ }
+ void SetCursor(u_int i, WT_CURSOR *c) {
+ if (i >= cursors_.size())
+ cursors_.resize(i + 1);
+ cursors_[i] = c;
+ }
+#endif
+ WT_SESSION *GetSession() { return session_; }
+
+private:
+ WT_SESSION *session_;
+ WT_CURSOR *cursor_;
+#ifdef HAVE_ROCKSDB
+ std::vector<WT_CURSOR *> cursors_;
+#endif
+};
+
+class CacheImpl : public Cache {
+public:
+ CacheImpl(size_t capacity) : Cache(), capacity_(capacity) {}
+ virtual ~CacheImpl() {}
+
+ virtual Handle* Insert(const Slice&, void*, size_t,
+ void (*)(const Slice&, void*)) { return 0; }
+ virtual Handle* Lookup(const Slice&) { return 0; }
+ virtual void Release(Handle*) {}
+ virtual void* Value(Handle*) { return 0; }
+ virtual void Erase(const Slice&) {}
+ virtual uint64_t NewId() { return 0; }
+
+ size_t capacity_;
+};
+
+#ifdef HAVE_ROCKSDB
+// ColumnFamilyHandleImpl is the class that clients use to access different
+// column families. It has non-trivial destructor, which gets called when client
+// is done using the column family
+class ColumnFamilyHandleImpl : public ColumnFamilyHandle {
+ public:
+ ColumnFamilyHandleImpl(DbImpl* db, std::string const &name, uint32_t id) : db_(db), id_(id), name_(name) {}
+ ColumnFamilyHandleImpl(const ColumnFamilyHandleImpl &copyfrom) : db_(copyfrom.db_), id_(copyfrom.id_), name_(copyfrom.name_) {}
+ virtual ~ColumnFamilyHandleImpl() {}
+ virtual uint32_t GetID() const { return id_; }
+
+ std::string const &GetName() const { return name_; }
+ std::string const GetURI() const { return "table:" + name_; }
+
+ private:
+ DbImpl* db_;
+ uint32_t id_;
+ std::string const name_;
+};
+#endif
+
+class IteratorImpl : public Iterator {
+public:
+ IteratorImpl(DbImpl *db, WT_CURSOR *cursor, uint32_t id=0) : db_(db), cursor_(cursor), id_(id) {}
+ virtual ~IteratorImpl();
+
+ // An iterator is either positioned at a key/value pair, or
+ // not valid. This method returns true iff the iterator is valid.
+ virtual bool Valid() const { return valid_; }
+
+ virtual void SeekToFirst();
+
+ virtual void SeekToLast();
+
+ virtual void Seek(const Slice& target);
+
+ virtual void Next();
+
+ virtual void Prev();
+
+ virtual Slice key() const {
+ return key_;
+ }
+
+ virtual Slice value() const {
+ return value_;
+ }
+
+ virtual Status status() const {
+ return status_;
+ }
+
+private:
+ DbImpl *db_;
+ WT_CURSOR *cursor_;
+ Slice key_, value_;
+ Status status_;
+ bool valid_;
+ uint32_t id_;
+
+ void SetError(int wiredTigerError) {
+ valid_ = false;
+ status_ = WiredTigerErrorToStatus(wiredTigerError, NULL);
+ }
+
+ // No copying allowed
+ IteratorImpl(const IteratorImpl&);
+ void operator=(const IteratorImpl&);
+};
+
+class SnapshotImpl : public Snapshot {
+friend class DbImpl;
+friend class IteratorImpl;
+public:
+ SnapshotImpl(DbImpl *db);
+ virtual ~SnapshotImpl() { delete context_; }
+protected:
+ OperationContext *GetContext() const { return context_; }
+ Status GetStatus() const { return status_; }
+ Status SetupTransaction();
+private:
+ DbImpl *db_;
+ OperationContext *context_;
+ Status status_;
+};
+
+class DbImpl : public leveldb::DB {
+friend class IteratorImpl;
+friend class SnapshotImpl;
+public:
+ DbImpl(WT_CONNECTION *conn) :
+ DB(), conn_(conn), context_(new ThreadLocal<OperationContext>) {}
+ virtual ~DbImpl() {
+ delete context_;
+ int ret = conn_->close(conn_, NULL);
+ assert(ret == 0);
+ }
+
+ virtual Status Put(const WriteOptions& options,
+ const Slice& key,
+ const Slice& value);
+
+ virtual Status Delete(const WriteOptions& options, const Slice& key);
+
+ virtual Status Write(const WriteOptions& options, WriteBatch* updates);
+
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, std::string* value);
+
+#if HAVE_BASHOLEVELDB
+ virtual Status Get(const ReadOptions& options,
+ const Slice& key, Value* value);
+#endif
+
+#ifdef HAVE_HYPERLEVELDB
+ virtual Status LiveBackup(const Slice& name);
+ virtual void GetReplayTimestamp(std::string* timestamp);
+ virtual void AllowGarbageCollectBeforeTimestamp(const std::string& timestamp);
+ virtual bool ValidateTimestamp(const std::string& timestamp);
+ virtual int CompareTimestamps(const std::string& lhs, const std::string& rhs);
+ virtual Status GetReplayIterator(const std::string& timestamp,
+ leveldb::ReplayIterator** iter);
+ virtual void ReleaseReplayIterator(leveldb::ReplayIterator* iter);
+#endif
+
+#ifdef HAVE_ROCKSDB
+ virtual Status CreateColumnFamily(const Options& options,
+ const std::string& column_family_name,
+ ColumnFamilyHandle** handle);
+
+ // Drop a column family specified by column_family handle. This call
+ // only records a drop record in the manifest and prevents the column
+ // family from flushing and compacting.
+ virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
+
+ // Set the database entry for "key" to "value".
+ // Returns OK on success, and a non-OK status on error.
+ // Note: consider setting options.sync = true.
+ virtual Status Put(const WriteOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value);
+
+ // Remove the database entry (if any) for "key". Returns OK on
+ // success, and a non-OK status on error. It is not an error if "key"
+ // did not exist in the database.
+ // Note: consider setting options.sync = true.
+ virtual Status Delete(const WriteOptions& options,
+ ColumnFamilyHandle* column_family,
+ const Slice& key);
+
+ // Merge the database entry for "key" with "value". Returns OK on success,
+ // and a non-OK status on error. The semantics of this operation is
+ // determined by the user provided merge_operator when opening DB.
+ // Note: consider setting options.sync = true.
+ virtual Status Merge(const WriteOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value);
+
+ // May return some other Status on an error.
+ virtual Status Get(const ReadOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ std::string* value);
+
+ // If keys[i] does not exist in the database, then the i'th returned
+ // status will be one for which Status::IsNotFound() is true, and
+ // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
+ // the i'th returned status will have Status::ok() true, and (*values)[i]
+ // will store the value associated with keys[i].
+ //
+ // (*values) will always be resized to be the same size as (keys).
+ // Similarly, the number of returned statuses will be the number of keys.
+ // Note: keys will not be "de-duplicated". Duplicate keys will return
+ // duplicate values in order.
+ virtual std::vector<Status> MultiGet(
+ const ReadOptions& options,
+ const std::vector<ColumnFamilyHandle*>& column_family,
+ const std::vector<Slice>& keys, std::vector<std::string>* values);
+
+ virtual Iterator* NewIterator(const ReadOptions& options,
+ ColumnFamilyHandle* column_family);
+
+ virtual bool GetProperty(ColumnFamilyHandle* column_family,
+ const Slice& property, std::string* value);
+
+ // Flush all mem-table data.
+ virtual Status Flush(const FlushOptions& options,
+ ColumnFamilyHandle* column_family);
+
+ ColumnFamilyHandleImpl *GetCF(uint32_t id) {
+ return (id < columns_.size()) ? static_cast<ColumnFamilyHandleImpl *>(columns_[id]) : NULL;
+ }
+ void SetColumns(std::vector<ColumnFamilyHandle *> &cols) {
+ columns_ = cols;
+ }
+#endif
+
+ virtual Iterator* NewIterator(const ReadOptions& options);
+
+ virtual const Snapshot* GetSnapshot();
+
+ virtual void ReleaseSnapshot(const Snapshot* snapshot);
+
+ virtual bool GetProperty(const Slice& property, std::string* value);
+
+ virtual void GetApproximateSizes(const Range* range, int n,
+ uint64_t* sizes);
+
+ virtual void CompactRange(const Slice* begin, const Slice* end);
+
+ virtual void SuspendCompactions();
+
+ virtual void ResumeCompactions();
+
+ OperationContext *GetContext() {
+ OperationContext *ctx = context_->Get();
+ if (ctx == NULL) {
+ ctx = NewContext();
+ context_->Set(ctx);
+ }
+ return (ctx);
+ }
+
+private:
+ WT_CONNECTION *conn_;
+ ThreadLocal<OperationContext> *context_;
+#ifdef HAVE_ROCKSDB
+ std::vector<ColumnFamilyHandle*> columns_;
+#endif
+
+ OperationContext *NewContext() {
+ return new OperationContext(conn_);
+ }
+
+ OperationContext *GetContext(const ReadOptions &options) {
+ if (options.snapshot == NULL)
+ return GetContext();
+ else {
+ const SnapshotImpl *si =
+ static_cast<const SnapshotImpl *>(options.snapshot);
+ assert(si->GetStatus().ok());
+ return si->GetContext();
+ }
+ }
+
+ // No copying allowed
+ DbImpl(const DbImpl&);
+ void operator=(const DbImpl&);
+};
+
+// Implemention of WriteBatch::Handler
+class WriteBatchHandler : public WriteBatch::Handler {
+public:
+ WriteBatchHandler(DbImpl *db, OperationContext *context) : db_(db), context_(context), status_(0) {}
+ virtual ~WriteBatchHandler() {}
+ int GetWiredTigerStatus() { return status_; }
+
+ virtual void Put(const Slice& key, const Slice& value);
+
+ virtual void Delete(const Slice& key);
+
+#ifdef HAVE_ROCKSDB
+ // Implementations are in rocksdb_wt.cc
+ virtual Status PutCF(uint32_t column_family_id, const Slice& key,
+ const Slice& value);
+ virtual Status DeleteCF(uint32_t column_family_id, const Slice& key);
+#endif
+
+private:
+ DbImpl *db_;
+ OperationContext *context_;
+ int status_;
+};
+
+#endif
diff --git a/src/third_party/wiredtiger/api/leveldb/rocks_wt.cc b/src/third_party/wiredtiger/api/leveldb/rocks_wt.cc
new file mode 100644
index 00000000000..6ccab2c1e78
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/rocks_wt.cc
@@ -0,0 +1,315 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "leveldb_wt.h"
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sstream>
+
+using leveldb::Cache;
+using leveldb::DB;
+using leveldb::FlushOptions;
+using leveldb::FilterPolicy;
+using leveldb::Iterator;
+using leveldb::Options;
+using leveldb::ReadOptions;
+using leveldb::WriteBatch;
+using leveldb::WriteOptions;
+using leveldb::Range;
+using leveldb::Slice;
+using leveldb::Snapshot;
+using leveldb::Status;
+
+static int
+wtrocks_get_cursor(OperationContext *context, ColumnFamilyHandle *cfhp, WT_CURSOR **cursorp, int acquire=0)
+{
+ ColumnFamilyHandleImpl *cf =
+ static_cast<ColumnFamilyHandleImpl *>(cfhp);
+ if (cf == NULL) {
+ fprintf(stderr, "Missing column!\n");
+ assert(0);
+ }
+ WT_CURSOR *c = context->GetCursor(cf->GetID());
+ if (c == NULL) {
+ WT_SESSION *session = context->GetSession();
+ int ret;
+ if ((ret = session->open_cursor(
+ session, cf->GetURI().c_str(), NULL, NULL, &c)) != 0) {
+ fprintf(stderr, "Failed to open cursor on %s: %s\n", cf->GetURI().c_str(), wiredtiger_strerror(ret));
+ return (ret);
+ }
+ if (!acquire)
+ context->SetCursor(cf->GetID(), c);
+ } else if (acquire)
+ context->SetCursor(cf->GetID(), NULL);
+ *cursorp = c;
+ return (0);
+}
+
+Status
+DB::ListColumnFamilies(
+ Options const &options, std::string const &name,
+ std::vector<std::string> *column_families)
+{
+ std::vector<std::string> cf;
+ DB *dbptr;
+ Status status = DB::Open(options, name, &dbptr);
+ if (!status.ok())
+ return status;
+ DbImpl *db = static_cast<DbImpl *>(dbptr);
+ OperationContext *context = db->GetContext();
+ WT_SESSION *session = context->GetSession();
+ WT_CURSOR *c;
+ int ret = session->open_cursor(session, "metadata:", NULL, NULL, &c);
+ if (ret != 0)
+ goto err;
+ c->set_key(c, "table:");
+ /* Position on the first table entry */
+ int cmp;
+ ret = c->search_near(c, &cmp);
+ if (ret != 0 || (cmp < 0 && (ret = c->next(c)) != 0))
+ goto err;
+ /* Add entries while we are getting "table" URIs. */
+ for (; ret == 0; ret = c->next(c)) {
+ const char *key;
+ if ((ret = c->get_key(c, &key)) != 0)
+ goto err;
+ if (strncmp(key, "table:", strlen("table:")) != 0)
+ break;
+ printf("List column families: [%d] = %s\n", (int)cf.size(), key);
+ cf.push_back(std::string(key + strlen("table:")));
+ }
+
+err: delete db;
+ /*
+ * WT_NOTFOUND is not an error: it just means we got to the end of the
+ * list of tables.
+ */
+ if (ret == 0 || ret == WT_NOTFOUND) {
+ *column_families = cf;
+ ret = 0;
+ }
+ return WiredTigerErrorToStatus(ret);
+}
+
+Status
+DB::Open(Options const &options, std::string const &name, const std::vector<ColumnFamilyDescriptor> &column_families, std::vector<ColumnFamilyHandle*> *handles, DB**dbptr)
+{
+ Status status = Open(options, name, dbptr);
+ if (!status.ok())
+ return status;
+ DbImpl *db = static_cast<DbImpl *>(*dbptr);
+ std::vector<ColumnFamilyHandle*> cfhandles(
+ column_families.size());
+ for (size_t i = 0; i < column_families.size(); i++) {
+ printf("Open column families: [%d] = %s\n", (int)i, column_families[i].name.c_str());
+ cfhandles[i] = new ColumnFamilyHandleImpl(
+ db, column_families[i].name, (int)i);
+ }
+ db->SetColumns(*handles = cfhandles);
+ return Status::OK();
+}
+
+void
+WriteBatch::Handler::Merge(const Slice& key, const Slice& value)
+{
+}
+
+void
+WriteBatch::Handler::LogData(const Slice& blob)
+{
+}
+
+Status
+WriteBatchHandler::PutCF(
+ uint32_t column_family_id, const Slice& key, const Slice& value)
+{
+ WT_CURSOR *cursor;
+ int ret = wtrocks_get_cursor(context_, db_->GetCF(column_family_id), &cursor);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor->set_value(cursor, &item);
+ ret = cursor->insert(cursor);
+ return WiredTigerErrorToStatus(ret);
+}
+
+Status
+WriteBatchHandler::DeleteCF(uint32_t column_family_id, const Slice& key)
+{
+ WT_CURSOR *cursor;
+ int ret = wtrocks_get_cursor(context_, db_->GetCF(column_family_id), &cursor);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ ret = cursor->remove(cursor);
+ if (ret == 0) {
+ int t_ret = cursor->reset(cursor);
+ assert(t_ret == 0);
+ } else if (ret == WT_NOTFOUND)
+ ret = 0;
+ return WiredTigerErrorToStatus(ret);
+}
+
+Status
+DbImpl::Merge(WriteOptions const&, ColumnFamilyHandle*, Slice const&, Slice const&)
+{
+ return WiredTigerErrorToStatus(ENOTSUP);
+}
+
+Status
+DbImpl::CreateColumnFamily(Options const &options, std::string const &name, ColumnFamilyHandle **cfhp)
+{
+ extern int wtleveldb_create(WT_CONNECTION *,
+ const Options &, std::string const &uri);
+ int ret = wtleveldb_create(conn_, options, "table:" + name);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+ int id = (int)columns_.size();
+ *cfhp = new ColumnFamilyHandleImpl(this, name, id);
+ printf("Create column family: [%d] = %s\n", id, name.c_str());
+ columns_.push_back(*cfhp);
+ return Status::OK();
+}
+
+Status
+DbImpl::DropColumnFamily(ColumnFamilyHandle *cfhp)
+{
+ ColumnFamilyHandleImpl *cf =
+ static_cast<ColumnFamilyHandleImpl *>(cfhp);
+ WT_SESSION *session = GetContext()->GetSession();
+ int ret = session->drop(session, cf->GetURI().c_str(), NULL);
+ return WiredTigerErrorToStatus(ret);
+}
+
+Status
+DbImpl::Delete(WriteOptions const &write_options, ColumnFamilyHandle *cfhp, Slice const &key)
+{
+ WT_CURSOR *cursor;
+ int ret = wtrocks_get_cursor(GetContext(), cfhp, &cursor);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ ret = cursor->remove(cursor);
+ // Reset the WiredTiger cursor so it doesn't keep any pages pinned.
+ // Track failures in debug builds since we don't expect failure, but
+ // don't pass failures on - it's not necessary for correct operation.
+ int t_ret = cursor->reset(cursor);
+ assert(t_ret == 0);
+ return WiredTigerErrorToStatus(ret);
+}
+
+Status
+DbImpl::Flush(FlushOptions const&, ColumnFamilyHandle* cfhp)
+{
+ ColumnFamilyHandleImpl *cf =
+ static_cast<ColumnFamilyHandleImpl *>(cfhp);
+ WT_SESSION *session = GetContext()->GetSession();
+ return WiredTigerErrorToStatus(session->checkpoint(session, ("target=(\"" + cf->GetURI() + "\")").c_str()));
+}
+
+Status
+DbImpl::Get(ReadOptions const &options, ColumnFamilyHandle *cfhp, Slice const &key, std::string *value)
+{
+ const char *errmsg = NULL;
+ OperationContext *context = GetContext(options);
+
+ WT_CURSOR *cursor;
+ int ret = wtrocks_get_cursor(context, cfhp, &cursor);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ if ((ret = cursor->search(cursor)) == 0 &&
+ (ret = cursor->get_value(cursor, &item)) == 0) {
+ *value = std::string((const char *)item.data, item.size);
+ ret = cursor->reset(cursor);
+ }
+ if (ret == WT_NOTFOUND)
+ errmsg = "DB::Get key not found";
+ return WiredTigerErrorToStatus(ret, errmsg);
+}
+
+bool
+DbImpl::GetProperty(ColumnFamilyHandle*, Slice const&, std::string*)
+{
+ return false;
+}
+
+std::vector<Status>
+DbImpl::MultiGet(ReadOptions const&, std::vector<ColumnFamilyHandle*> const&, std::vector<Slice> const&, std::vector<std::string, std::allocator<std::string> >*)
+{
+ std::vector<Status> ret;
+ ret.push_back(WiredTigerErrorToStatus(ENOTSUP));
+ return ret;
+}
+
+Iterator *
+DbImpl::NewIterator(ReadOptions const &options, ColumnFamilyHandle *cfhp)
+{
+ OperationContext *context = GetContext(options);
+
+ WT_CURSOR *c;
+ int ret = wtrocks_get_cursor(context, cfhp, &c, 1);
+ assert(ret == 0);
+ return new IteratorImpl(this, c,
+ static_cast<ColumnFamilyHandleImpl *>(cfhp)->GetID());
+}
+
+Status
+DbImpl::Put(WriteOptions const &options, ColumnFamilyHandle *cfhp, Slice const &key, Slice const &value)
+{
+ WT_CURSOR *cursor;
+ int ret = wtrocks_get_cursor(GetContext(), cfhp, &cursor);
+ if (ret != 0)
+ return WiredTigerErrorToStatus(ret);
+
+ WT_ITEM item;
+ item.data = key.data();
+ item.size = key.size();
+ cursor->set_key(cursor, &item);
+ item.data = value.data();
+ item.size = value.size();
+ cursor->set_value(cursor, &item);
+ ret = cursor->insert(cursor);
+ return WiredTigerErrorToStatus(ret, NULL);
+}
diff --git a/src/third_party/wiredtiger/api/leveldb/rocksdb/LICENSE b/src/third_party/wiredtiger/api/leveldb/rocksdb/LICENSE
new file mode 100644
index 00000000000..b1329018690
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/rocksdb/LICENSE
@@ -0,0 +1,35 @@
+BSD License
+
+For rocksdb software
+
+Copyright (c) 2014, Facebook, Inc.
+All rights reserved.
+---------------------------------------------------------------------
+
+Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/third_party/wiredtiger/api/leveldb/rocksdb/PATENTS b/src/third_party/wiredtiger/api/leveldb/rocksdb/PATENTS
new file mode 100644
index 00000000000..6bafb4a342f
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/rocksdb/PATENTS
@@ -0,0 +1,23 @@
+Additional Grant of Patent Rights
+
+"Software" means the rocksdb software distributed by Facebook, Inc.
+
+Facebook hereby grants you a perpetual, worldwide, royalty-free,
+non-exclusive, irrevocable (subject to the termination provision below)
+license under any rights in any patent claims owned by Facebook, to make,
+have made, use, sell, offer to sell, import, and otherwise transfer the
+Software. For avoidance of doubt, no license is granted under Facebook's
+rights in any patent claims that are infringed by (i) modifications to the
+Software made by you or a third party, or (ii) the Software in combination
+with any software or other technology provided by you or a third party.
+
+The license granted hereunder will terminate, automatically and without
+notice, for anyone that makes any claim (including by filing any lawsuit,
+assertion or other action) alleging (a) direct, indirect, or contributory
+infringement or inducement to infringe any patent: (i) by Facebook or any
+of its subsidiaries or affiliates, whether or not such claim is related
+to the Software, (ii) by any party if such claim arises in whole or in
+part from any software, product or service of Facebook or any of its
+subsidiaries or affiliates, whether or not such claim is related to the
+Software, or (iii) by any party relating to the Software; or (b) that
+any right in any patent claim of Facebook is invalid or unenforceable.
diff --git a/src/third_party/wiredtiger/api/leveldb/rocksdb/write_batch.cc b/src/third_party/wiredtiger/api/leveldb/rocksdb/write_batch.cc
new file mode 100644
index 00000000000..aa7a3d239f9
--- /dev/null
+++ b/src/third_party/wiredtiger/api/leveldb/rocksdb/write_batch.cc
@@ -0,0 +1,275 @@
+// Copyright (c) 2013, Facebook, Inc. All rights reserved.
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree. An additional grant
+// of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// WriteBatch::rep_ :=
+// sequence: fixed64
+// count: fixed32
+// data: record[count]
+// record :=
+// kTypeValue varstring varstring
+// kTypeMerge varstring varstring
+// kTypeDeletion varstring
+// kTypeColumnFamilyValue varint32 varstring varstring
+// kTypeColumnFamilyMerge varint32 varstring varstring
+// kTypeColumnFamilyDeletion varint32 varstring varstring
+// varstring :=
+// len: varint32
+// data: uint8[len]
+
+#include "leveldb_wt.h"
+
+#include "db/write_batch_internal.h"
+
+#include <stdexcept>
+
+namespace rocksdb {
+
+// WriteBatch header has an 8-byte sequence number followed by a 4-byte count.
+static const size_t kHeader = 12;
+
+WriteBatch::WriteBatch(size_t reserved_bytes) {
+ rep_.reserve((reserved_bytes > kHeader) ? reserved_bytes : kHeader);
+ Clear();
+}
+
+WriteBatch::~WriteBatch() { }
+
+WriteBatch::Handler::~Handler() { }
+
+void WriteBatch::Handler::Put(const Slice& key, const Slice& value) {
+ // you need to either implement Put or PutCF
+ throw std::runtime_error("Handler::Put not implemented!");
+}
+
+#ifdef NOT_YET
+void WriteBatch::Handler::Merge(const Slice& key, const Slice& value) {
+ throw std::runtime_error("Handler::Merge not implemented!");
+}
+#endif
+
+void WriteBatch::Handler::Delete(const Slice& key) {
+ // you need to either implement Delete or DeleteCF
+ throw std::runtime_error("Handler::Delete not implemented!");
+}
+
+#ifdef NOT_YET
+void WriteBatch::Handler::LogData(const Slice& blob) {
+ // If the user has not specified something to do with blobs, then we ignore
+ // them.
+}
+#endif
+
+bool WriteBatch::Handler::Continue() {
+ return true;
+}
+
+void WriteBatch::Clear() {
+ rep_.clear();
+ rep_.resize(kHeader);
+}
+
+int WriteBatch::Count() const {
+ return WriteBatchInternal::Count(this);
+}
+
+Status WriteBatch::Iterate(Handler* handler) const {
+ Slice input(rep_);
+ if (input.size() < kHeader) {
+ return Status::Corruption("malformed WriteBatch (too small)");
+ }
+
+ input.remove_prefix(kHeader);
+ Slice key, value, blob;
+ int found = 0;
+ Status s;
+ while (s.ok() && !input.empty() && handler->Continue()) {
+ char tag = input[0];
+ input.remove_prefix(1);
+ uint32_t column_family = 0; // default
+ switch (tag) {
+ case kTypeColumnFamilyValue:
+ if (!GetVarint32(&input, &column_family)) {
+ return Status::Corruption("bad WriteBatch Put");
+ }
+ // intentional fallthrough
+ case kTypeValue:
+ if (GetLengthPrefixedSlice(&input, &key) &&
+ GetLengthPrefixedSlice(&input, &value)) {
+ s = handler->PutCF(column_family, key, value);
+ found++;
+ } else {
+ return Status::Corruption("bad WriteBatch Put");
+ }
+ break;
+ case kTypeColumnFamilyDeletion:
+ if (!GetVarint32(&input, &column_family)) {
+ return Status::Corruption("bad WriteBatch Delete");
+ }
+ // intentional fallthrough
+ case kTypeDeletion:
+ if (GetLengthPrefixedSlice(&input, &key)) {
+ s = handler->DeleteCF(column_family, key);
+ found++;
+ } else {
+ return Status::Corruption("bad WriteBatch Delete");
+ }
+ break;
+ case kTypeColumnFamilyMerge:
+ if (!GetVarint32(&input, &column_family)) {
+ return Status::Corruption("bad WriteBatch Merge");
+ }
+ // intentional fallthrough
+ case kTypeMerge:
+ if (GetLengthPrefixedSlice(&input, &key) &&
+ GetLengthPrefixedSlice(&input, &value)) {
+ s = handler->MergeCF(column_family, key, value);
+ found++;
+ } else {
+ return Status::Corruption("bad WriteBatch Merge");
+ }
+ break;
+ case kTypeLogData:
+ if (GetLengthPrefixedSlice(&input, &blob)) {
+ handler->LogData(blob);
+ } else {
+ return Status::Corruption("bad WriteBatch Blob");
+ }
+ break;
+ default:
+ return Status::Corruption("unknown WriteBatch tag");
+ }
+ }
+ if (!s.ok()) {
+ return s;
+ }
+ if (found != WriteBatchInternal::Count(this)) {
+ return Status::Corruption("WriteBatch has wrong count");
+ } else {
+ return Status::OK();
+ }
+}
+
+int WriteBatchInternal::Count(const WriteBatch* b) {
+ return DecodeFixed32(b->rep_.data() + 8);
+}
+
+void WriteBatchInternal::SetCount(WriteBatch* b, int n) {
+ EncodeFixed32(&b->rep_[8], n);
+}
+
+#ifdef NOT_YET
+SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) {
+ return SequenceNumber(DecodeFixed64(b->rep_.data()));
+}
+
+void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) {
+ EncodeFixed64(&b->rep_[0], seq);
+}
+#endif
+
+void WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
+ const Slice& key, const Slice& value) {
+ WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+ if (column_family_id == 0) {
+ b->rep_.push_back(static_cast<char>(kTypeValue));
+ } else {
+ b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
+ PutVarint32(&b->rep_, column_family_id);
+ }
+ PutLengthPrefixedSlice(&b->rep_, key);
+ PutLengthPrefixedSlice(&b->rep_, value);
+}
+
+namespace {
+inline uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family) {
+ uint32_t column_family_id = 0;
+ if (column_family != NULL) {
+ ColumnFamilyHandleImpl *cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
+ column_family_id = cfh->GetID();
+ }
+ return column_family_id;
+}
+} // namespace
+
+void WriteBatch::Put(ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) {
+ WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key, value);
+}
+
+void WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
+ const SliceParts& key, const SliceParts& value) {
+ WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+ if (column_family_id == 0) {
+ b->rep_.push_back(static_cast<char>(kTypeValue));
+ } else {
+ b->rep_.push_back(static_cast<char>(kTypeColumnFamilyValue));
+ PutVarint32(&b->rep_, column_family_id);
+ }
+ PutLengthPrefixedSliceParts(&b->rep_, key);
+ PutLengthPrefixedSliceParts(&b->rep_, value);
+}
+
+void WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key,
+ const SliceParts& value) {
+ WriteBatchInternal::Put(this, GetColumnFamilyID(column_family), key, value);
+}
+
+void WriteBatchInternal::Delete(WriteBatch* b, uint32_t column_family_id,
+ const Slice& key) {
+ WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+ if (column_family_id == 0) {
+ b->rep_.push_back(static_cast<char>(kTypeDeletion));
+ } else {
+ b->rep_.push_back(static_cast<char>(kTypeColumnFamilyDeletion));
+ PutVarint32(&b->rep_, column_family_id);
+ }
+ PutLengthPrefixedSlice(&b->rep_, key);
+}
+
+void WriteBatch::Delete(ColumnFamilyHandle* column_family, const Slice& key) {
+ WriteBatchInternal::Delete(this, GetColumnFamilyID(column_family), key);
+}
+
+#ifdef NOT_YET
+void WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
+ const Slice& key, const Slice& value) {
+ WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
+ if (column_family_id == 0) {
+ b->rep_.push_back(static_cast<char>(kTypeMerge));
+ } else {
+ b->rep_.push_back(static_cast<char>(kTypeColumnFamilyMerge));
+ PutVarint32(&b->rep_, column_family_id);
+ }
+ PutLengthPrefixedSlice(&b->rep_, key);
+ PutLengthPrefixedSlice(&b->rep_, value);
+}
+
+void WriteBatch::Merge(ColumnFamilyHandle* column_family, const Slice& key,
+ const Slice& value) {
+ WriteBatchInternal::Merge(this, GetColumnFamilyID(column_family), key, value);
+}
+
+void WriteBatch::PutLogData(const Slice& blob) {
+ rep_.push_back(static_cast<char>(kTypeLogData));
+ PutLengthPrefixedSlice(&rep_, blob);
+}
+#endif
+
+void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
+ assert(contents.size() >= kHeader);
+ b->rep_.assign(contents.data(), contents.size());
+}
+
+void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) {
+ SetCount(dst, Count(dst) + Count(src));
+ assert(src->rep_.size() >= kHeader);
+ dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader);
+}
+
+} // namespace rocksdb