summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-06-19 20:58:06 +0000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-06-19 20:58:06 +0000
commit0513806e66fca4715c20ae5ea64e172a2bf1ee74 (patch)
tree4337ea4650af08f0343ad5a73e9cda6d88f6e535
parenta4bee00dcaf814493818c1a55f8d060e9195ea9e (diff)
downloadmongo-0513806e66fca4715c20ae5ea64e172a2bf1ee74.tar.gz
Import wiredtiger: d687cc19830b5b479d7474724ed609c416bfb1d2 from branch mongodb-3.0
ref: b1aab8db7d..d687cc1983 for: 3.0.16 WT-2321 Fix a race between eviction and worker threads on the eviction queue WT-3000 Missing log records in recovery when crashing after a log file switch WT-3008 Run wtperf stress testing against all maintained branches WT-3010 shared-cache-stress can hang on mongodb-3.0 branch WT-3038 WiredTigers mongodb-3.0 branch crashes and hangs when run against test format WT-3049 Backport test/format to mongodb-3.0 branch WT-3158 Fix structure layout on Windows. WT-3277 [v3.0] Eviction of pages belonging to tables that have been force dropped WT-3286 [v3.0] Improve search if an index hint is wrong
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf10
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/stress/shared-cache-stress.wtperf (renamed from src/third_party/wiredtiger/bench/wtperf/runners/shared-cache-stress.wtperf)0
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.subdirs9
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok19
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_style10
-rw-r--r--src/third_party/wiredtiger/import.data6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c9
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c18
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c22
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h4
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i7
-rw-r--r--src/third_party/wiredtiger/src/log/log.c265
-rw-r--r--src/third_party/wiredtiger/test/format/Makefile.am27
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c139
-rw-r--r--src/third_party/wiredtiger/test/format/bdb.c54
-rw-r--r--src/third_party/wiredtiger/test/format/bulk.c93
-rw-r--r--src/third_party/wiredtiger/test/format/compact.c12
-rw-r--r--src/third_party/wiredtiger/test/format/config.c263
-rw-r--r--src/third_party/wiredtiger/test/format/config.h68
-rw-r--r--src/third_party/wiredtiger/test/format/format.h137
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c170
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c990
-rw-r--r--src/third_party/wiredtiger/test/format/salvage.c41
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/smoke.sh12
-rw-r--r--src/third_party/wiredtiger/test/format/t.c165
-rw-r--r--src/third_party/wiredtiger/test/format/util.c319
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c273
-rw-r--r--src/third_party/wiredtiger/test/recovery/Makefile.am18
-rw-r--r--src/third_party/wiredtiger/test/recovery/random-abort.c428
-rwxr-xr-xsrc/third_party/wiredtiger/test/recovery/smoke.sh9
-rw-r--r--src/third_party/wiredtiger/test/recovery/truncated-log.c398
-rw-r--r--src/third_party/wiredtiger/test/utility/Makefile.am4
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c236
-rw-r--r--src/third_party/wiredtiger/test/utility/parse_opts.c130
-rw-r--r--src/third_party/wiredtiger/test/utility/test_util.h194
-rw-r--r--src/third_party/wiredtiger/test/utility/thread.c141
39 files changed, 3502 insertions, 1204 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf
new file mode 100644
index 00000000000..e8f20cb1f53
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf
@@ -0,0 +1,10 @@
+conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=4,threads_min=4)"
+table_config="type=file,leaf_page_max=8k,internal_page_max=8k,memory_page_max=2MB,split_deepen_min_child=250"
+icount=200000
+report_interval=5
+run_time=300
+#reopen_connection=false
+populate_threads=2
+value_sz=256
+#read_range=100
+threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1))
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/shared-cache-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/stress/shared-cache-stress.wtperf
index 87d14f4f5c1..87d14f4f5c1 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/shared-cache-stress.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/stress/shared-cache-stress.wtperf
diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs
index 8f8614a7016..37a8d9e1a66 100644
--- a/src/third_party/wiredtiger/build_posix/Make.subdirs
+++ b/src/third_party/wiredtiger/build_posix/Make.subdirs
@@ -20,11 +20,18 @@ examples/c
lang/java JAVA
examples/java JAVA
lang/python PYTHON
+
+# Test/Benchmark support library.
+test/utility
+
+# Test programs.
test/bloom
test/checkpoint
test/fops
-test/format HAVE_BERKELEY_DB
+test/format
test/huge
test/packing
+test/recovery
test/salvage
test/thread
+
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 6b2ed50701d..657d20a55bd 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -16,6 +16,7 @@ ASYNC
Addr
Ailamaki
Alakuijala
+Alexandrescu's
Alloc
Async
Athanassoulis
@@ -331,6 +332,7 @@ Subtree
Subtrees
TAILQ
TCMalloc
+TESTUTIL
TODO
TOOSMALL
TORTIOUS
@@ -389,6 +391,7 @@ WiredTigerTxn
WithSeeds
Wmissing
Wuninitialized
+Wunused
XP
ZLIB
Zlib
@@ -416,6 +419,7 @@ arg
argc
args
argv
+assertfmt
async
asyncopp
autockpt
@@ -472,6 +476,8 @@ centric
cfg
cfkos
change's
+chdir
+checkfmt
checkfrag
checkpointed
checkpointer
@@ -539,6 +545,7 @@ datasets
datasource
datastore
dbc
+dcalloc
decile
deciles
decl
@@ -569,12 +576,16 @@ dlh
dll
dlopen
dlsym
+dmalloc
dmsg
+drealloc
ds
dsk
dsrc
dst
dstlen
+dstrdup
+dstrndup
dsync
dt
dtype
@@ -648,6 +659,7 @@ fread
free'd
freelist
fs
+fscanf
fsm
fstat
fsync
@@ -690,6 +702,7 @@ iSh
ibackup
icount
idx
+ifdef
ifdef's
ifndef
ikey
@@ -781,6 +794,7 @@ lsnappy
lu
lz
lzo
+mT
madvise
majorp
malloc
@@ -882,6 +896,7 @@ optype
os
ovfl
ownp
+pR
packv
parens
parserp
@@ -1117,6 +1132,7 @@ uid
uint
uintmax
unbare
+unbuffered
uncompressing
uncompresssed
undef
@@ -1150,6 +1166,7 @@ usr
utf
util
uu
+vW
va
valuep
valuev
@@ -1170,6 +1187,7 @@ vsnprintf
vtype
vunpack
vupdate
+waitpid
walk's
wb
wiredtiger
@@ -1189,6 +1207,7 @@ xxxx
xxxxx
xxxxxx
zalloc
+zf
zfree
zlib
zu
diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style
index b0a642a4e26..9c03f9b08b5 100755
--- a/src/third_party/wiredtiger/dist/s_style
+++ b/src/third_party/wiredtiger/dist/s_style
@@ -92,6 +92,16 @@ for f in \
cat $t
}
+ # If we don't have matching pack-begin and pack-end calls, we don't get
+ # an error, we just get a Windows performance regression. Using awk and
+ # not wc to ensure there's no whitespace in the assignment.
+ egrep WT_PACKED_STRUCT $f > $t
+ cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t`
+ test `expr "$cnt" % 2` -ne 0 && {
+ echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines"
+ cat $t
+ }
+
# Direct calls to functions we're not supposed to use in the library.
# We don't check for all of them, just a few of the common ones.
if ! expr "$f" : 'bench/.*' > /dev/null &&
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
new file mode 100644
index 00000000000..7deef8a0f3b
--- /dev/null
+++ b/src/third_party/wiredtiger/import.data
@@ -0,0 +1,6 @@
+{
+ "commit": "d687cc19830b5b479d7474724ed609c416bfb1d2",
+ "github": "wiredtiger/wiredtiger.git",
+ "vendor": "wiredtiger",
+ "branch": "mongodb-3.0"
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 4affc9c243e..d533c805d3a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -634,6 +634,8 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page)
__dmsg(ds, ", evict-lru");
if (F_ISSET_ATOMIC(page, WT_PAGE_SCANNING))
__dmsg(ds, ", scanning");
+ if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
+ __dmsg(ds, ", split-insert");
if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_LOCKED))
__dmsg(ds, ", split locked");
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 30e19147e12..57db820697f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -411,6 +411,7 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
for (; upd != NULL; upd = next) {
/* Everything we free should be visible to everyone. */
WT_ASSERT(session,
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
F_ISSET(session, WT_SESSION_DISCARD_FORCE) ||
upd->txnid == WT_TXN_ABORTED ||
__wt_txn_visible_all(session, upd->txnid));
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 00452af549d..0907905bd43 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -624,11 +624,14 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
* it may not have been set.
*/
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
- btree->maxmempage =
- WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage);
+ btree->maxmempage = (uint64_t)cval.val;
cache_size = S2C(session)->cache_size;
if (cache_size > 0)
- btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 4);
+ btree->maxmempage = WT_MIN(btree->maxmempage,
+ cache_size / 10);
+
+ /* Enforce a lower bound of a single disk leaf page */
+ btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage);
/*
* Get the split percentage (reconciliation splits pages into smaller
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index c7d83d8dfff..530c1af2add 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -40,21 +40,15 @@ retry: WT_INTL_INDEX_GET(session, ref->home, pindex);
* is slower.
*/
i = ref->pindex_hint;
- if (i < pindex->entries && pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = i;
- return;
- }
+ if (i < pindex->entries && pindex->index[i]->page == ref->page)
+ goto found;
while (++i < pindex->entries)
- if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->pindex_hint = i;
- return;
- }
+ if (pindex->index[i]->page == ref->page)
+ goto found;
for (i = 0; i < pindex->entries; ++i)
if (pindex->index[i]->page == ref->page) {
- *pindexp = pindex;
- *slotp = ref->pindex_hint = i;
+found: *pindexp = pindex;
+ *slotp = i;
return;
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 1314f715907..dc16f3bf616 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -837,9 +837,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags)
{
WT_CACHE *cache;
WT_DECL_RET;
- WT_EVICT_ENTRY *evict;
uint64_t cutoff;
- uint32_t candidates, entries, i;
+ uint32_t candidates, entries;
cache = S2C(session)->cache;
@@ -857,6 +856,14 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags)
while (entries > 0 && cache->evict[entries - 1].ref == NULL)
--entries;
+ /*
+ * If we have more entries than the maximum tracked between walks,
+ * clear them. Do this before figuring out how many of the entries are
+ * candidates so we never end up with more candidates than entries.
+ */
+ while (entries > WT_EVICT_WALK_BASE)
+ __evict_list_clear(session, &cache->evict[--entries]);
+
cache->evict_entries = entries;
if (entries == 0) {
@@ -900,15 +907,6 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags)
cache->evict_candidates = candidates;
}
- /* If we have more than the minimum number of entries, clear them. */
- if (cache->evict_entries > WT_EVICT_WALK_BASE) {
- for (i = WT_EVICT_WALK_BASE, evict = cache->evict + i;
- i < cache->evict_entries;
- i++, evict++)
- __evict_list_clear(session, evict);
- cache->evict_entries = WT_EVICT_WALK_BASE;
- }
-
cache->evict_current = cache->evict;
__wt_spin_unlock(session, &cache->evict_lock);
@@ -1155,7 +1153,7 @@ __evict_init_candidate(
evict->ref = ref;
evict->btree = S2BT(session);
- /* Mark the page on the list */
+ /* Mark the page on the list; set last to flush the other updates. */
F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 046d8bb3eba..c944b9b0e22 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -413,7 +413,8 @@ __evict_review(
if (closing)
FLD_SET(reconcile_flags, WT_SKIP_UPDATE_ERR);
else if (!WT_PAGE_IS_INTERNAL(page) &&
- page->read_gen == WT_READGEN_OLDEST)
+ (page->read_gen == WT_READGEN_OLDEST ||
+ !__wt_txn_visible_all(session, page->modify->update_txn)))
FLD_SET(reconcile_flags, WT_SKIP_UPDATE_RESTORE);
WT_RET(__wt_reconcile(session, ref, NULL, reconcile_flags));
WT_ASSERT(session,
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index fb497f64963..e46fb66930a 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -684,7 +684,7 @@ struct __wt_ref {
* up our slot in the page's index structure.
*/
WT_PAGE * volatile home; /* Reference page */
- uint32_t pindex_hint; /* Reference page index hint */
+ volatile uint32_t pindex_hint; /* Reference page index hint */
#define WT_REF_DISK 0 /* Page is on disk */
#define WT_REF_DELETED 1 /* Page is on disk, but deleted */
@@ -888,7 +888,7 @@ WT_PACKED_STRUCT_BEGIN(__wt_update)
#define WT_UPDATE_MEMSIZE(upd) \
WT_ALIGN(sizeof(WT_UPDATE) + \
(WT_UPDATE_DELETED_ISSET(upd) ? 0 : (upd)->size), 32)
-};
+WT_PACKED_STRUCT_END
/*
* WT_INSERT --
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index f63a07de44e..99e4ec847fe 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -195,6 +195,13 @@ __wt_cache_full_check(WT_SESSION_IMPL *session)
return (0);
/*
+ * If the connection is closing we do not need eviction from an
+ * application thread. The eviction subsystem is already closed.
+ */
+ if (F_ISSET(S2C(session), WT_CONN_CLOSING))
+ return (0);
+
+ /*
* Only wake the eviction server the first time through here (if the
* cache is too full).
*
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 5a7c2cb1b20..d38db0aac1d 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -10,7 +10,7 @@
static int __log_decompress(WT_SESSION_IMPL *, WT_ITEM *, WT_ITEM **);
static int __log_openfile(
- WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
+ WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
static int __log_read_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *,
uint32_t);
static int __log_write_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *,
@@ -19,6 +19,112 @@ static int __log_write_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *,
#define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record))
/*
+ * __log_has_hole --
+ * Determine if the current offset represents a hole in the log
+ * file (i.e. there is valid data somewhere after the hole), or
+ * if this is the end of this log file and the remainder of the
+ * file is zeroes.
+ */
+static int
+__log_has_hole(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t log_size, wt_off_t offset, bool *hole)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ wt_off_t off, remainder;
+ size_t bufsz, rdlen;
+ char *buf, *zerobuf;
+
+ conn = S2C(session);
+ log = conn->log;
+ remainder = log_size - offset;
+ *hole = false;
+
+ /*
+ * It can be very slow looking for the last real record in the log
+ * in very small chunks. Walk a megabyte at a time. If we find a
+ * part of the log that is not just zeroes we know this log file
+ * has a hole in it.
+ */
+ buf = zerobuf = NULL;
+ if (log == NULL || log->allocsize < WT_MEGABYTE)
+ bufsz = WT_MEGABYTE;
+ else
+ bufsz = log->allocsize;
+
+ if ((size_t)remainder < bufsz)
+ bufsz = (size_t)remainder;
+ WT_RET(__wt_calloc_def(session, bufsz, &buf));
+ WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf));
+
+ /*
+ * Read in a chunk starting at the given offset.
+ * Compare against a known zero byte chunk.
+ */
+ for (off = offset; remainder > 0;
+ remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) {
+ rdlen = WT_MIN(bufsz, (size_t)remainder);
+ WT_ERR(__wt_read(session, fh, off, rdlen, buf));
+ if (memcmp(buf, zerobuf, rdlen) != 0) {
+ *hole = true;
+ break;
+ }
+ }
+
+err: __wt_free(session, buf);
+ __wt_free(session, zerobuf);
+ return (ret);
+}
+
+/*
+ * __log_wait_for_earlier_slot --
+ * Wait for write_lsn to catch up to this slot.
+ */
+static int
+__log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+ int yield_count;
+
+ conn = S2C(session);
+ log = conn->log;
+ yield_count = 0;
+
+ while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) {
+ if (++yield_count < 1000)
+ __wt_yield();
+ else
+ WT_RET(__wt_cond_wait(
+ session, log->log_write_cond, 200));
+ }
+ return (0);
+}
+
+/*
+ * __log_fs_write --
+ * Wrapper when writing to a log file. If we're writing to a new log
+ * file for the first time wait for writes to the previous log file.
+ */
+static int
+__log_fs_write(WT_SESSION_IMPL *session,
+ WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf)
+{
+ /*
+ * If we're writing into a new log file, we have to wait for all
+ * writes to the previous log file to complete otherwise there could
+ * be a hole at the end of the previous log file that we cannot detect.
+ */
+ if (slot->slot_release_lsn.file < slot->slot_start_lsn.file) {
+ __log_wait_for_earlier_slot(session, slot);
+ WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn));
+ (void)__wt_write(session, slot->slot_fh, offset, len, buf);
+ }
+ return (__wt_write(session, slot->slot_fh, offset, len, buf));
+}
+
+/*
* __wt_log_ckpt --
* Record the given LSN as the checkpoint LSN and signal the archive
* thread as needed.
@@ -51,6 +157,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
conn = S2C(session);
log = conn->log;
+ log_fh = NULL;
/*
* We need to wait for the previous log file to get written
@@ -83,25 +190,20 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
* Sync the log file if needed.
*/
if (LOG_CMP(&log->sync_lsn, min_lsn) < 0) {
- /*
- * Get our own file handle to the log file. It is possible
- * for the file handle in the log structure to change out
- * from under us and either be NULL or point to a different
- * file than we want.
- */
- WT_ERR(__log_openfile(session,
- false, &log_fh, WT_LOG_FILENAME, min_lsn->file));
+ WT_ERR(__log_openfile(session, false,
+ &log_fh, WT_LOG_FILENAME, min_lsn->file));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %d/%lu",
log_fh->name, min_lsn->file, min_lsn->offset));
WT_ERR(__wt_fsync(session, log_fh));
log->sync_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync);
- WT_ERR(__wt_close(session, &log_fh));
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
}
err:
__wt_spin_unlock(session, &log->log_sync_lock);
+ if (log_fh != NULL)
+ WT_TRET(__wt_close(session, &log_fh));
return (ret);
}
@@ -467,13 +569,13 @@ __log_fill(WT_SESSION_IMPL *session,
logrec = (WT_LOG_RECORD *)record->mem;
/*
- * Call __wt_write. For now the offset is the real byte offset.
+ * Call write. For now the offset is the real byte offset.
* If the offset becomes a unit of WT_LOG_ALIGN this is where we would
* multiply by WT_LOG_ALIGN to get the real file byte offset for
* write().
*/
if (direct)
- WT_ERR(__wt_write(session, myslot->slot->slot_fh,
+ WT_ERR(__log_fs_write(session, myslot->slot,
myslot->offset + myslot->slot->slot_start_offset,
(size_t)logrec->len, (void *)logrec));
else
@@ -936,94 +1038,6 @@ __wt_log_close(WT_SESSION_IMPL *session)
}
/*
- * __log_filesize --
- * Returns an estimate of the real end of log file.
- */
-static int
-__log_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *eof)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- wt_off_t log_size, off, off1;
- uint32_t allocsize, bufsz;
- char *buf, *zerobuf;
-
- conn = S2C(session);
- log = conn->log;
- if (eof == NULL)
- return (0);
- *eof = 0;
- WT_RET(__wt_filesize(session, fh, &log_size));
- if (log == NULL)
- allocsize = WT_LOG_ALIGN;
- else
- allocsize = log->allocsize;
-
- /*
- * It can be very slow looking for the last real record in the log
- * in very small chunks. Walk backward by a megabyte at a time. When
- * we find a part of the log that is not just zeroes, walk to find
- * the last record.
- */
- buf = zerobuf = NULL;
- if (allocsize < WT_MEGABYTE && log_size > WT_MEGABYTE)
- bufsz = WT_MEGABYTE;
- else
- bufsz = allocsize;
- WT_RET(__wt_calloc_def(session, bufsz, &buf));
- WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf));
-
- /*
- * Read in a chunk starting at the end of the file. Keep going until
- * we reach the beginning or we find a chunk that contains any non-zero
- * bytes. Compare against a known zero byte chunk.
- */
- for (off = log_size - (wt_off_t)bufsz;
- off >= 0;
- off -= (wt_off_t)bufsz) {
- WT_ERR(__wt_read(session, fh, off, bufsz, buf));
- if (memcmp(buf, zerobuf, bufsz) != 0)
- break;
- }
-
- /*
- * If we're walking by large amounts, now walk by the real allocsize
- * to find the real end, if we found something. Otherwise we reached
- * the beginning of the file. Offset can go negative if the log file
- * size is not a multiple of a megabyte. The first chunk of the log
- * file will always be non-zero.
- */
- if (off < 0)
- off = 0;
-
- /*
- * We know all log records are aligned at log->allocsize. The first
- * item in a log record is always a 32-bit length. Look for any
- * non-zero length at the allocsize boundary. This may not be a true
- * log record since it could be the middle of a large record. But we
- * know no log record starts after it. Return an estimate of the log
- * file size.
- */
- for (off1 = bufsz - allocsize;
- off1 > 0; off1 -= (wt_off_t)allocsize)
- if (memcmp(buf + off1, zerobuf, sizeof(uint32_t)) != 0)
- break;
- off = off + off1;
-
- /*
- * Set EOF to the last zero-filled record we saw.
- */
- *eof = off + (wt_off_t)allocsize;
-err:
- if (buf != NULL)
- __wt_free(session, buf);
- if (zerobuf != NULL)
- __wt_free(session, zerobuf);
- return (ret);
-}
-
-/*
* __log_release --
* Release a log slot.
*/
@@ -1035,13 +1049,11 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
WT_LOG *log;
WT_LSN sync_lsn;
size_t write_size;
- int yield_count;
bool locked;
WT_DECL_SPINLOCK_ID(id); /* Must appear last */
conn = S2C(session);
log = conn->log;
- yield_count = 0;
locked = false;
if (freep != NULL)
*freep = true;
@@ -1050,7 +1062,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
if (F_ISSET(slot, WT_SLOT_BUFFERED)) {
write_size = (size_t)
(slot->slot_end_lsn.offset - slot->slot_start_offset);
- WT_ERR(__wt_write(session, slot->slot_fh,
+ WT_ERR(__log_fs_write(session, slot,
slot->slot_start_offset, write_size, slot->slot_buf.mem));
}
@@ -1079,13 +1091,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
* be holes in the log file.
*/
WT_STAT_FAST_CONN_INCR(session, log_release_write_lsn);
- while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) {
- if (++yield_count < 1000)
- __wt_yield();
- else
- WT_ERR(__wt_cond_wait(
- session, log->log_write_cond, 200));
- }
+ WT_ERR(__log_wait_for_earlier_slot(session, slot));
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
WT_ERR(__wt_cond_signal(session, log->log_write_cond));
@@ -1373,7 +1379,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen;
u_int i, logcount;
int firstrecord;
- bool eol;
+ bool eol, partial_record;
char **logfiles;
conn = S2C(session);
@@ -1462,12 +1468,22 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
}
WT_ERR(__log_openfile(
session, false, &log_fh, WT_LOG_FILENAME, start_lsn.file));
- WT_ERR(__log_filesize(session, log_fh, &log_size));
+ WT_ERR(__wt_filesize(session, log_fh, &log_size));
rd_lsn = start_lsn;
WT_ERR(__wt_buf_initsize(session, &buf, WT_LOG_ALIGN));
for (;;) {
if (rd_lsn.offset + allocsize > log_size) {
advance:
+ if (rd_lsn.offset == log_size)
+ partial_record = false;
+ else
+ /*
+ * See if there is anything non-zero at the
+ * end of this log file.
+ */
+ WT_ERR(__log_has_hole(
+ session, log_fh, log_size,
+ rd_lsn.offset, &partial_record));
/*
* If we read the last record, go to the next file.
*/
@@ -1475,6 +1491,15 @@ advance:
log_fh = NULL;
eol = true;
/*
+ * If we had a partial record, we'll want to break
+ * now after closing and truncating. Although for now
+ * log_truncate does not modify the LSN passed in,
+ * this code does not assume it is unmodified after that
+ * call which is why it uses the boolean set earlier.
+ */
+ if (partial_record)
+ break;
+ /*
* Truncate this log file before we move to the next.
*/
if (LF_ISSET(WT_LOGSCAN_RECOVER))
@@ -1490,7 +1515,7 @@ advance:
break;
WT_ERR(__log_openfile(session,
false, &log_fh, WT_LOG_FILENAME, rd_lsn.file));
- WT_ERR(__log_filesize(session, log_fh, &log_size));
+ WT_ERR(__wt_filesize(session, log_fh, &log_size));
eol = false;
continue;
}
@@ -1515,9 +1540,14 @@ advance:
* that may exist.
*/
if (reclen == 0) {
- /* This LSN is the end. */
- eol = true;
- break;
+ WT_ERR(__log_has_hole(
+ session, log_fh, log_size, rd_lsn.offset, &eol));
+ if (eol)
+ /* Found a hole. This LSN is the end. */
+ break;
+ else
+ /* Last record in log. Look for more. */
+ goto advance;
}
rdup_len = __wt_rduppo2(reclen, allocsize);
if (reclen > allocsize) {
@@ -1862,7 +1892,8 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
WT_ERR(__log_release(session, myslot.slot, &free_slot));
if (free_slot)
WT_ERR(__wt_log_slot_free(session, myslot.slot));
- } else if (LF_ISSET(WT_LOG_FSYNC)) {
+ }
+ if (LF_ISSET(WT_LOG_FSYNC)) {
/* Wait for our writes to reach disk */
while (LOG_CMP(&log->sync_lsn, &lsn) <= 0 &&
myslot.slot->slot_error == 0)
diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am
index 421de768248..ee5ce256653 100644
--- a/src/third_party/wiredtiger/test/format/Makefile.am
+++ b/src/third_party/wiredtiger/test/format/Makefile.am
@@ -1,14 +1,27 @@
-AM_CPPFLAGS = -DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" \
- -I$(top_builddir) -I$(top_srcdir)/src/include \
- -I$(BERKELEY_DB_PATH)/include
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
+if HAVE_BERKELEY_DB
+AM_CPPFLAGS +=-DHAVE_BERKELEY_DB
+AM_CPPFLAGS +=-DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\"
+AM_CPPFLAGS +=-I$(BERKELEY_DB_PATH)/include
+endif
noinst_PROGRAMS = t
noinst_SCRIPTS = s_dumpcmp
t_SOURCES =\
- config.h format.h backup.c bdb.c bulk.c compact.c config.c ops.c \
- salvage.c t.c util.c wts.c
-
-t_LDADD = $(top_builddir)/libwiredtiger.la -L$(BERKELEY_DB_PATH)/lib -ldb
+ backup.c bulk.c compact.c config.c lrt.c ops.c salvage.c t.c util.c \
+ wts.c
+
+if HAVE_BERKELEY_DB
+t_SOURCES += bdb.c
+endif
+
+t_LDADD = $(top_builddir)/test/utility/libtest_util.la
+t_LDADD +=$(top_builddir)/libwiredtiger.la
+if HAVE_BERKELEY_DB
+t_LDADD += -L$(BERKELEY_DB_PATH)/lib -ldb
+endif
t_LDFLAGS = -static
#noinst_LTLIBRARIES = lzo_compress.la
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index 3b95ea92b5e..d7446df5b82 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -37,41 +37,39 @@ check_copy(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
- int ret;
- wts_open(g.home_backup, 0, &conn);
+ wts_open(g.home_backup, false, &conn);
- if ((ret = conn->open_session(
- conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session: %s", g.home_backup);
+ testutil_checkfmt(
+ conn->open_session(conn, NULL, NULL, &session),
+ "%s", g.home_backup);
- ret = session->verify(session, g.uri, NULL);
- if (ret != 0)
- die(ret, "session.verify: %s: %s", g.home_backup, g.uri);
+ testutil_checkfmt(
+ session->verify(session, g.uri, NULL),
+ "%s: %s", g.home_backup, g.uri);
- if ((ret = conn->close(conn, NULL)) != 0)
- die(ret, "connection.close: %s", g.home_backup);
+ testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
}
/*
* copy_file --
- * Copy a single file into the backup directory.
+ * Copy a single file into the backup directories.
*/
static void
-copy_file(const char *name)
+copy_file(WT_SESSION *session, const char *name)
{
size_t len;
char *cmd;
int ret;
- len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20;
- if ((cmd = malloc(len)) == NULL)
- die(errno, "malloc");
- (void)snprintf(cmd, len,
- "cp %s/%s %s/%s", g.home, name, g.home_backup, name);
- if ((ret = system(cmd)) != 0)
- die(ret, "backup copy: %s", cmd);
- free(cmd);
+ len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20;
+ if ((cmd = malloc(len)) == NULL)
+ testutil_die(errno, "malloc");
+ (void)snprintf(cmd, len,
+ "cp %s/%s %s/%s", g.home, name, g.home_backup, name);
+ if ((ret = system(cmd)) != 0)
+ testutil_die(ret, "backup copy: %s", cmd);
+ free(cmd);
}
/*
@@ -83,10 +81,11 @@ backup(void *arg)
{
WT_CONNECTION *conn;
WT_CURSOR *backup_cursor;
+ WT_DECL_RET;
WT_SESSION *session;
- u_int period;
- int ret;
- const char *key;
+ u_int incremental, period;
+ bool full;
+ const char *config, *key;
(void)(arg);
@@ -97,58 +96,90 @@ backup(void *arg)
return (NULL);
/* Open a session. */
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
/*
- * Perform a backup at somewhere under 10 seconds (so we get at
- * least one done), and then at 45 second intervals.
+ * Perform a full backup at somewhere under 10 seconds (that way there's
+ * at least one), then at larger intervals, optionally do incremental
+ * backups between full backups.
*/
- for (period = mmrand(NULL, 1, 10);; period = 45) {
+ incremental = 0;
+ for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) {
/* Sleep for short periods so we don't make the run wait. */
while (period > 0 && !g.workers_finished) {
--period;
sleep(1);
}
- if (g.workers_finished)
- break;
- /* Lock out named checkpoints */
- if ((ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_wrlock: backup lock");
+ /*
+ * We can't drop named checkpoints while there's a backup in
+ * progress, serialize backups with named checkpoints. Wait
+ * for the checkpoint to complete, otherwise backups might be
+ * starved out.
+ */
+ testutil_check(pthread_rwlock_wrlock(&g.backup_lock));
+ if (g.workers_finished) {
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+ break;
+ }
- /* Re-create the backup directory. */
- if ((ret = system(g.home_backup_init)) != 0)
- die(ret, "backup directory creation failed");
+ if (incremental) {
+ config = "target=(\"log:\")";
+ full = false;
+ } else {
+ /* Re-create the backup directory. */
+ testutil_checkfmt(
+ system(g.home_backup_init),
+ "%s", "backup directory creation failed");
+
+ config = NULL;
+ full = true;
+ }
/*
- * open_cursor can return EBUSY if a metadata operation is
- * currently happening - retry in that case.
+ * open_cursor can return EBUSY if concurrent with a metadata
+ * operation, retry in that case.
*/
- while ((ret = session->open_cursor(session,
- "backup:", NULL, NULL, &backup_cursor)) == EBUSY)
- sleep(1);
+ while ((ret = session->open_cursor(
+ session, "backup:", NULL, config, &backup_cursor)) == EBUSY)
+ __wt_yield();
if (ret != 0)
- die(ret, "session.open_cursor: backup");
+ testutil_die(ret, "session.open_cursor: backup");
while ((ret = backup_cursor->next(backup_cursor)) == 0) {
- if ((ret =
- backup_cursor->get_key(backup_cursor, &key)) != 0)
- die(ret, "cursor.get_key");
- copy_file(key);
+ testutil_check(
+ backup_cursor->get_key(backup_cursor, &key));
+ copy_file(session, key);
}
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "backup-cursor");
- if ((ret = backup_cursor->close(backup_cursor)) != 0)
- die(ret, "cursor.close");
+ /* After an incremental backup, truncate the log files. */
+ if (incremental)
+ testutil_check(session->truncate(
+ session, "log:", backup_cursor, NULL, NULL));
- if ((ret = pthread_rwlock_unlock(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_unlock: backup lock");
+ testutil_check(backup_cursor->close(backup_cursor));
+ testutil_check(pthread_rwlock_unlock(&g.backup_lock));
- check_copy();
+ /*
+ * If automatic log archival isn't configured, optionally do
+ * incremental backups after each full backup. If we're not
+ * doing any more incremental, verify the backup (we can't
+ * verify intermediate states, once we perform recovery on the
+ * backup database, we can't do any more incremental backups).
+ */
+ if (full)
+ incremental =
+ g.c_logging_archive ? 1 : mmrand(NULL, 1, 5);
+ if (--incremental == 0)
+ check_copy();
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ if (incremental != 0)
+ check_copy();
+
+ testutil_check(session->close(session, NULL));
return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c
index fec23112549..8b61573fdf9 100644
--- a/src/third_party/wiredtiger/test/format/bdb.c
+++ b/src/third_party/wiredtiger/test/format/bdb.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -30,7 +30,7 @@
#include "format.h"
static DBT key, value;
-static uint8_t *keybuf;
+static WT_ITEM keyitem;
static int
bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2
@@ -78,7 +78,7 @@ bdb_open(void)
assert(db->cursor(db, NULL, &dbc, 0) == 0);
g.dbc = dbc;
- key_gen_setup(&keybuf);
+ key_gen_setup(&keyitem);
}
void
@@ -95,8 +95,7 @@ bdb_close(void)
assert(db->close(db, 0) == 0);
assert(dbenv->close(dbenv, 0) == 0);
- free(keybuf);
- keybuf = NULL;
+ free(keyitem.mem);
}
void
@@ -107,9 +106,9 @@ bdb_insert(
DBC *dbc;
key.data = (void *)key_data;
- key.size = (uint32_t)key_size;
+ key.size = (u_int32_t)key_size;
value.data = (void *)value_data;
- value.size = (uint32_t)value_size;
+ value.size = (u_int32_t)value_size;
dbc = g.dbc;
@@ -128,7 +127,7 @@ bdb_np(int next,
if ((ret =
dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.get: %s: {%.*s}",
+ testutil_die(ret, "dbc.get: %s: {%.*s}",
next ? "DB_NEXT" : "DB_PREV",
(int)key.size, (char *)key.data);
*notfoundp = 1;
@@ -144,17 +143,16 @@ void
bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp)
{
DBC *dbc = g.dbc;
- size_t size;
int ret;
- key_gen(keybuf, &size, keyno);
- key.data = keybuf;
- key.size = (uint32_t)size;
+ key_gen(&keyitem, keyno);
+ key.data = (void *)keyitem.data;
+ key.size = (u_int32_t)keyitem.size;
*notfoundp = 0;
if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.get: DB_SET: {%.*s}",
+ testutil_die(ret, "dbc.get: DB_SET: {%.*s}",
(int)key.size, (char *)key.data);
*notfoundp = 1;
} else {
@@ -165,25 +163,20 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp)
void
bdb_update(const void *arg_key, size_t arg_key_size,
- const void *arg_value, size_t arg_value_size, int *notfoundp)
+ const void *arg_value, size_t arg_value_size)
{
DBC *dbc = g.dbc;
int ret;
key.data = (void *)arg_key;
- key.size = (uint32_t)arg_key_size;
+ key.size = (u_int32_t)arg_key_size;
value.data = (void *)arg_value;
- value.size = (uint32_t)arg_value_size;
+ value.size = (u_int32_t)arg_value_size;
- *notfoundp = 0;
- if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) {
- if (ret != DB_NOTFOUND) {
- die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- }
- *notfoundp = 1;
- }
+ if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0)
+ testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
+ (int)key.size, (char *)key.data,
+ (int)value.size, (char *)value.data);
}
void
@@ -193,18 +186,19 @@ bdb_remove(uint64_t keyno, int *notfoundp)
size_t size;
int ret;
- key_gen(keybuf, &size, keyno);
- key.data = keybuf;
- key.size = (uint32_t)size;
+ size = 0;
+ key_gen(&keyitem, keyno);
+ key.data = (void *)keyitem.data;
+ key.size = (u_int32_t)keyitem.size;
bdb_read(keyno, &value.data, &size, notfoundp);
- value.size = (uint32_t)size;
+ value.size = (u_int32_t)size;
if (*notfoundp)
return;
if ((ret = dbc->del(dbc, 0)) != 0) {
if (ret != DB_NOTFOUND)
- die(ret, "dbc.del: {%.*s}",
+ testutil_die(ret, "dbc.del: {%.*s}",
(int)key.size, (char *)key.data);
*notfoundp = 1;
}
diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c
index ad9bc180e5e..d59b3100686 100644
--- a/src/third_party/wiredtiger/test/format/bulk.c
+++ b/src/third_party/wiredtiger/test/format/bulk.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -33,34 +33,37 @@ wts_load(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
+ WT_DECL_RET;
WT_ITEM key, value;
WT_SESSION *session;
- uint8_t *keybuf, *valbuf;
- int is_bulk, ret;
+ bool is_bulk;
conn = g.wts_conn;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== bulk load start ===============");
/*
- * Avoid bulk load with KVS (there's no bulk load support for a
- * data-source); avoid bulk load with a custom collator, because
- * the order of insertion will not match the collation order.
+ * No bulk load with data-sources.
+ *
+ * No bulk load with custom collators, the order of insertion will not
+ * match the collation order.
*/
- is_bulk = !g.c_reverse &&
- !DATASOURCE("kvsbdb") && !DATASOURCE("helium");
- if ((ret = session->open_cursor(session, g.uri, NULL,
- is_bulk ? "bulk" : NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ is_bulk = true;
+ if (DATASOURCE("kvsbdb") && DATASOURCE("helium"))
+ is_bulk = false;
+ if (g.c_reverse)
+ is_bulk = false;
- /* Set up the default key buffer. */
- key_gen_setup(&keybuf);
- val_gen_setup(NULL, &valbuf);
+ testutil_check(session->open_cursor(session, g.uri, NULL,
+ is_bulk ? "bulk,append" : NULL, &cursor));
+
+ /* Set up the key/value buffers. */
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
for (;;) {
if (++g.key_cnt > g.c_rows) {
@@ -69,13 +72,11 @@ wts_load(void)
}
/* Report on progress every 100 inserts. */
- if (g.key_cnt % 100 == 0)
+ if (g.key_cnt % 1000 == 0)
track("bulk load", g.key_cnt, NULL);
- key_gen(keybuf, &key.size, (uint64_t)g.key_cnt);
- key.data = keybuf;
- val_gen(NULL, valbuf, &value.size, (uint64_t)g.key_cnt);
- value.data = valbuf;
+ key_gen(&key, g.key_cnt);
+ val_gen(NULL, &value, g.key_cnt);
switch (g.type) {
case FIX:
@@ -84,7 +85,7 @@ wts_load(void)
cursor->set_value(cursor, *(uint8_t *)value.data);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {0x%02" PRIx8 "}",
+ "%-10s %" PRIu64 " {0x%02" PRIx8 "}",
"bulk V",
g.key_cnt, ((uint8_t *)value.data)[0]);
break;
@@ -94,7 +95,7 @@ wts_load(void)
cursor->set_value(cursor, &value);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk V",
+ "%-10s %" PRIu64 " {%.*s}", "bulk V",
g.key_cnt,
(int)value.size, (char *)value.data);
break;
@@ -102,37 +103,53 @@ wts_load(void)
cursor->set_key(cursor, &key);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk K",
+ "%-10s %" PRIu64 " {%.*s}", "bulk K",
g.key_cnt, (int)key.size, (char *)key.data);
cursor->set_value(cursor, &value);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s %" PRIu32 " {%.*s}", "bulk V",
+ "%-10s %" PRIu64 " {%.*s}", "bulk V",
g.key_cnt,
(int)value.size, (char *)value.data);
break;
}
- if ((ret = cursor->insert(cursor)) != 0)
- die(ret, "cursor.insert");
-
- if (!SINGLETHREADED)
- continue;
+ /*
+ * We don't want to size the cache to ensure the initial data
+ * set can load in the in-memory case, guaranteeing the load
+ * succeeds probably means future updates are also guaranteed
+ * to succeed, which isn't what we want. If we run out of space
+ * in the initial load, reset the row counter and continue.
+ *
+ * Decrease inserts, they can't be successful if we're at the
+ * cache limit, and increase the delete percentage to get some
+ * extra space once the run starts.
+ */
+ if ((ret = cursor->insert(cursor)) != 0) {
+ g.rows = --g.key_cnt;
+ g.c_rows = (uint32_t)g.key_cnt;
+
+ if (g.c_insert_pct > 5)
+ g.c_insert_pct = 5;
+ if (g.c_delete_pct < 20)
+ g.c_delete_pct += 20;
+ break;
+ }
- /* Insert the item into BDB. */
- bdb_insert(key.data, key.size, value.data, value.size);
+#ifdef HAVE_BERKELEY_DB
+ if (SINGLETHREADED)
+ bdb_insert(key.data, key.size, value.data, value.size);
+#endif
}
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_check(cursor->close(cursor));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== bulk load stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
- free(keybuf);
- free(valbuf);
+ free(key.mem);
+ free(value.mem);
}
diff --git a/src/third_party/wiredtiger/test/format/compact.c b/src/third_party/wiredtiger/test/format/compact.c
index ad603504023..240e5553697 100644
--- a/src/third_party/wiredtiger/test/format/compact.c
+++ b/src/third_party/wiredtiger/test/format/compact.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -36,9 +36,9 @@ void *
compact(void *arg)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
u_int period;
- int ret;
(void)(arg);
@@ -48,8 +48,7 @@ compact(void *arg)
/* Open a session. */
conn = g.wts_conn;
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
/*
* Perform compaction at somewhere under 15 seconds (so we get at
@@ -66,11 +65,10 @@ compact(void *arg)
if ((ret = session->compact(
session, g.uri, NULL)) != 0 && ret != WT_ROLLBACK)
- die(ret, "session.compact");
+ testutil_die(ret, "session.compact");
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
return (NULL);
}
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index bc05a422391..572aae9bad4 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -30,15 +30,17 @@
#include "config.h"
static void config_checksum(void);
-static void config_compression(void);
+static void config_compression(const char *);
static const char *config_file_type(u_int);
static CONFIG *config_find(const char *, size_t);
static int config_is_perm(const char *);
static void config_isolation(void);
+static void config_lrt(void);
static void config_map_checksum(const char *, u_int *);
static void config_map_compression(const char *, u_int *);
static void config_map_file_type(const char *, u_int *);
static void config_map_isolation(const char *, u_int *);
+static void config_reset(void);
/*
* config_setup --
@@ -50,7 +52,7 @@ config_setup(void)
CONFIG *cp;
/* Clear any temporary values. */
- config_clear();
+ config_reset();
/*
* Choose a data source type and a file type: they're interrelated (LSM
@@ -100,8 +102,7 @@ config_setup(void)
* our configuration, LSM or KVS devices are "tables", but files are
* tested as well.
*/
- if ((g.uri = malloc(256)) == NULL)
- die(errno, "malloc");
+ g.uri = dmalloc(256);
strcpy(g.uri, DATASOURCE("file") ? "file:" : "table:");
if (DATASOURCE("helium"))
strcat(g.uri, "dev1/");
@@ -120,22 +121,19 @@ config_setup(void)
if (F_ISSET(cp, C_BOOL))
*cp->v = mmrand(NULL, 1, 100) <= cp->min ? 1 : 0;
else
- *cp->v = CONF_RAND(cp);
+ *cp->v = mmrand(NULL, cp->min, cp->maxrand);
}
/* Required shared libraries. */
if (DATASOURCE("helium") && access(HELIUM_PATH, R_OK) != 0)
- die(errno, "Levyx/helium shared library: %s", HELIUM_PATH);
+ testutil_die(errno,
+ "Levyx/helium shared library: %s", HELIUM_PATH);
if (DATASOURCE("kvsbdb") && access(KVS_BDB_PATH, R_OK) != 0)
- die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH);
+ testutil_die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH);
/* Some data-sources don't support user-specified collations. */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
- g.c_reverse = 0;
-
- config_checksum();
- config_compression();
- config_isolation();
+ config_single("reverse=off", 0);
/*
* Periodically, run single-threaded so we can compare the results to
@@ -145,13 +143,19 @@ config_setup(void)
if (!g.replay && g.run_cnt % 20 == 19 && !config_is_perm("threads"))
g.c_threads = 1;
+ config_checksum();
+ config_compression("compression");
+ config_compression("logging_compression");
+ config_isolation();
+ config_lrt();
+
/*
* Periodically, set the delete percentage to 0 so salvage gets run,
* as long as the delete percentage isn't nailed down.
* Don't do it on the first run, all our smoke tests would hit it.
*/
if (!g.replay && g.run_cnt % 10 == 9 && !config_is_perm("delete_pct"))
- g.c_delete_pct = 0;
+ config_single("delete_pct=0", 0);
/*
* If this is an LSM run, set the cache size and crank up the insert
@@ -165,9 +169,22 @@ config_setup(void)
g.c_insert_pct = mmrand(NULL, 50, 85);
}
- /* Make the default maximum-run length 20 minutes. */
- if (!config_is_perm("timer"))
- g.c_timer = 20;
+ /* Ensure there is at least 1MB of cache per thread. */
+ if (!config_is_perm("cache") && g.c_cache < g.c_threads)
+ g.c_cache = g.c_threads;
+
+ /*
+ * Run-length configured by a number of operations and a timer. If the
+ * operation count and the timer are both set by a configuration, there
+ * isn't anything to do. If only the operation count was configured,
+ * set a default maximum-run of 20 minutes. If only the timer is set,
+ * clear the operations count (which was set randomly).
+ */
+ if (config_is_perm("timer")) {
+ if (!config_is_perm("ops"))
+ config_single("ops=0", 0);
+ } else
+ config_single("timer=20", 0);
/*
* Key/value minimum/maximum are related, correct unless specified by
@@ -178,14 +195,15 @@ config_setup(void)
if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min)
g.c_key_max = g.c_key_min;
if (g.c_key_min > g.c_key_max)
- die(EINVAL, "key_min may not be larger than key_max");
+ testutil_die(EINVAL, "key_min may not be larger than key_max");
if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max)
g.c_value_min = g.c_value_max;
if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min)
g.c_value_max = g.c_value_min;
if (g.c_value_min > g.c_value_max)
- die(EINVAL, "value_min may not be larger than value_max");
+ testutil_die(EINVAL,
+ "value_min may not be larger than value_max");
/* Reset the key count. */
g.key_cnt = 0;
@@ -218,39 +236,63 @@ config_checksum(void)
* Compression configuration.
*/
static void
-config_compression(void)
+config_compression(const char *conf_name)
{
const char *cstr;
+ char confbuf[128];
+
+ /* Return if already specified. */
+ if (config_is_perm(conf_name))
+ return;
/*
- * Compression: choose something if compression wasn't specified,
- * otherwise confirm the appropriate shared library is available.
- * We used to verify that the libraries existed but that's no longer
- * robust, since it's possible to build compression libraries into
- * the WiredTiger library.
+ * Don't configure a compression engine for logging if logging isn't
+ * configured (it won't break, but it's confusing).
*/
- if (!config_is_perm("compression")) {
- cstr = "compression=none";
- switch (mmrand(NULL, 1, 20)) {
- case 1: case 2: case 3: case 4: /* 20% no compression */
- break;
- case 8: case 9: case 10: case 11: /* 20% lz4 */
- cstr = "compression=lz4";
- break;
- case 5: case 6: case 7:
- case 12: case 13: case 14: case 15: /* 35% snappy */
- cstr = "compression=snappy";
- break;
- case 16: case 17: case 18: case 19: /* 20% zlib */
- cstr = "compression=zlib";
- break;
- case 20: /* 5% zlib-no-raw */
- cstr = "compression=zlib-noraw";
- break;
- }
+ cstr = "none";
+ if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) {
+ (void)snprintf(
+ confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr);
+ config_single(confbuf, 0);
+ return;
+ }
- config_single(cstr, 0);
+ /*
+ * Select a compression type from the list of built-in engines.
+ *
+ * Listed percentages are only correct if all of the possible engines
+ * are compiled in.
+ */
+ switch (mmrand(NULL, 1, 20)) {
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ case 1: case 2: case 3: case 4: /* 20% lz4 */
+ cstr = "lz4";
+ break;
+ case 5: /* 5% lz4-no-raw */
+ cstr = "lz4-noraw";
+ break;
+#endif
+#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
+ case 6: case 7: case 8: case 9: /* 30% snappy */
+ case 10: case 11:
+ cstr = "snappy";
+ break;
+#endif
+#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
+ case 12: case 13: case 14: case 15: /* 20% zlib */
+ cstr = "zlib";
+ break;
+ case 16: /* 5% zlib-no-raw */
+ cstr = "zlib-noraw";
+ break;
+#endif
+ case 17: case 18: case 19: case 20: /* 15% no compression */
+ default:
+ break;
}
+
+ (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr);
+ config_single(confbuf, 0);
}
/*
@@ -287,6 +329,22 @@ config_isolation(void)
}
/*
+ * config_lrt --
+ * Long-running transaction configuration.
+ */
+static void
+config_lrt(void)
+{
+ if (g.type == FIX) {
+ if (config_is_perm("long_running_txn"))
+ testutil_die(EINVAL,
+ "long_running_txn not supported with fixed-length "
+ "column store");
+ config_single("long_running_txn=off", 0);
+ }
+}
+
+/*
* config_error --
* Display configuration information on error.
*/
@@ -320,7 +378,7 @@ config_print(int error_display)
fp = stdout;
else
if ((fp = fopen(g.home_config, "w")) == NULL)
- die(errno, "fopen: %s", g.home_config);
+ testutil_die(errno, "fopen: %s", g.home_config);
fprintf(fp, "############################################\n");
fprintf(fp, "# RUN PARAMETERS\n");
@@ -335,8 +393,12 @@ config_print(int error_display)
fprintf(fp, "%s=%" PRIu32 "\n", cp->name, *cp->v);
fprintf(fp, "############################################\n");
+
+ /* Flush so we're up-to-date on error. */
+ (void)fflush(fp);
+
if (fp != stdout)
- (void)fclose(fp);
+ fclose_and_clear(&fp);
}
/*
@@ -350,7 +412,7 @@ config_file(const char *name)
char *p, buf[256];
if ((fp = fopen(name, "r")) == NULL)
- die(errno, "fopen: %s", name);
+ testutil_die(errno, "fopen: %s", name);
while (fgets(buf, sizeof(buf), fp) != NULL) {
for (p = buf; *p != '\0' && *p != '\n'; ++p)
;
@@ -359,24 +421,42 @@ config_file(const char *name)
continue;
config_single(buf, 1);
}
- (void)fclose(fp);
+ fclose_and_clear(&fp);
}
/*
* config_clear --
- * Clear per-run values.
+ * Clear all configuration values.
*/
void
config_clear(void)
{
CONFIG *cp;
- /* Clear configuration data. */
+ /* Clear all allocated configuration data. */
+ for (cp = c; cp->name != NULL; ++cp)
+ if (cp->vstr != NULL) {
+ free((void *)*cp->vstr);
+ *cp->vstr = NULL;
+ }
+ free(g.uri);
+ g.uri = NULL;
+}
+
+/*
+ * config_reset --
+ * Clear per-run configuration values.
+ */
+static void
+config_reset(void)
+{
+ CONFIG *cp;
+
+ /* Clear temporary allocated configuration data. */
for (cp = c; cp->name != NULL; ++cp) {
F_CLR(cp, C_TEMP);
- if (!F_ISSET(cp, C_PERM) &&
- F_ISSET(cp, C_STRING) && cp->vstr != NULL) {
- free(*cp->vstr);
+ if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) {
+ free((void *)*cp->vstr);
*cp->vstr = NULL;
}
}
@@ -392,7 +472,7 @@ void
config_single(const char *s, int perm)
{
CONFIG *cp;
- uint64_t v;
+ long v;
char *p;
const char *ep;
@@ -418,35 +498,55 @@ config_single(const char *s, int perm)
exit(EXIT_FAILURE);
}
+ /*
+ * Free the previous setting if a configuration has been
+ * passed in twice.
+ */
+ if (*cp->vstr != NULL) {
+ free(*cp->vstr);
+ *cp->vstr = NULL;
+ }
+
if (strncmp(s, "checksum", strlen("checksum")) == 0) {
config_map_checksum(ep, &g.c_checksum_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(
s, "compression", strlen("compression")) == 0) {
config_map_compression(ep, &g.c_compression_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "isolation", strlen("isolation")) == 0) {
config_map_isolation(ep, &g.c_isolation_flag);
- *cp->vstr = strdup(ep);
+ *cp->vstr = dstrdup(ep);
} else if (strncmp(s, "file_type", strlen("file_type")) == 0) {
config_map_file_type(ep, &g.type);
- *cp->vstr = strdup(config_file_type(g.type));
+ *cp->vstr = dstrdup(config_file_type(g.type));
+ } else if (strncmp(s, "logging_compression",
+ strlen("logging_compression")) == 0) {
+ config_map_compression(ep,
+ &g.c_logging_compression_flag);
+ *cp->vstr = dstrdup(ep);
} else {
- if (*cp->vstr != NULL)
- free(*cp->vstr);
- *cp->vstr = strdup(ep);
+ free((void *)*cp->vstr);
+ *cp->vstr = dstrdup(ep);
}
- if (*cp->vstr == NULL)
- die(errno, "malloc");
return;
}
- v = strtoul(ep, &p, 10);
- if (*p != '\0') {
- fprintf(stderr, "%s: %s: illegal numeric value\n",
- g.progname, s);
- exit(EXIT_FAILURE);
+ v = -1;
+ if (F_ISSET(cp, C_BOOL)) {
+ if (strncmp(ep, "off", strlen("off")) == 0)
+ v = 0;
+ else if (strncmp(ep, "on", strlen("on")) == 0)
+ v = 1;
+ }
+ if (v == -1) {
+ v = strtol(ep, &p, 10);
+ if (*p != '\0') {
+ fprintf(stderr, "%s: %s: illegal numeric value\n",
+ g.progname, s);
+ exit(EXIT_FAILURE);
+ }
}
if (F_ISSET(cp, C_BOOL)) {
if (v != 0 && v != 1) {
@@ -454,10 +554,10 @@ config_single(const char *s, int perm)
g.progname, s);
exit(EXIT_FAILURE);
}
- } else if ((uint32_t)v < cp->min || (uint32_t)v > cp->maxset) {
- fprintf(stderr, "%s: %s: value of %" PRIu32
- " outside min/max values of %" PRIu32 "-%" PRIu32 "\n",
- g.progname, s, *cp->v, cp->min, cp->maxset);
+ } else if (v < cp->min || v > cp->maxset) {
+ fprintf(stderr, "%s: %s: value outside min/max values of %"
+ PRIu32 "-%" PRIu32 "\n",
+ g.progname, s, cp->min, cp->maxset);
exit(EXIT_FAILURE);
}
*cp->v = (uint32_t)v;
@@ -480,7 +580,7 @@ config_map_file_type(const char *s, u_int *vp)
strcmp(s, "row-store") == 0)
*vp = ROW;
else
- die(EINVAL, "illegal file type configuration: %s", s);
+ testutil_die(EINVAL, "illegal file type configuration: %s", s);
}
/*
@@ -497,7 +597,7 @@ config_map_checksum(const char *s, u_int *vp)
else if (strcmp(s, "uncompressed") == 0)
*vp = CHECKSUM_UNCOMPRESSED;
else
- die(EINVAL, "illegal checksum configuration: %s", s);
+ testutil_die(EINVAL, "illegal checksum configuration: %s", s);
}
/*
@@ -509,12 +609,10 @@ config_map_compression(const char *s, u_int *vp)
{
if (strcmp(s, "none") == 0)
*vp = COMPRESS_NONE;
- else if (strcmp(s, "bzip") == 0)
- *vp = COMPRESS_BZIP;
- else if (strcmp(s, "bzip-raw") == 0)
- *vp = COMPRESS_BZIP_RAW;
else if (strcmp(s, "lz4") == 0)
*vp = COMPRESS_LZ4;
+ else if (strcmp(s, "lz4-noraw") == 0)
+ *vp = COMPRESS_LZ4_NO_RAW;
else if (strcmp(s, "lzo") == 0)
*vp = COMPRESS_LZO;
else if (strcmp(s, "snappy") == 0)
@@ -524,7 +622,8 @@ config_map_compression(const char *s, u_int *vp)
else if (strcmp(s, "zlib-noraw") == 0)
*vp = COMPRESS_ZLIB_NO_RAW;
else
- die(EINVAL, "illegal compression configuration: %s", s);
+ testutil_die(EINVAL,
+ "illegal compression configuration: %s", s);
}
/*
@@ -543,7 +642,7 @@ config_map_isolation(const char *s, u_int *vp)
else if (strcmp(s, "snapshot") == 0)
*vp = ISOLATION_SNAPSHOT;
else
- die(EINVAL, "illegal isolation configuration: %s", s);
+ testutil_die(EINVAL, "illegal isolation configuration: %s", s);
}
/*
@@ -555,7 +654,7 @@ config_find(const char *s, size_t len)
{
CONFIG *cp;
- for (cp = c; cp->name != NULL; ++cp)
+ for (cp = c; cp->name != NULL; ++cp)
if (strncmp(s, cp->name, len) == 0 && cp->name[len] == '\0')
return (cp);
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 2d2b4e03010..f8dc5a703e5 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -37,7 +37,7 @@ typedef struct {
/* Value is a boolean, yes if roll of 1-to-100 is <= CONFIG->min. */
#define C_BOOL 0x001
- /* Not a simple randomization, handle outside the main loop. */
+ /* Not a simple randomization, handle outside the main loop. */
#define C_IGNORE 0x002
/* Value was set from command-line or file, ignore for all runs. */
@@ -57,11 +57,8 @@ typedef struct {
char **vstr; /* Value for string options */
} CONFIG;
-/*
- * Get a random value between a config min/max pair (inclusive for both min
- * and max).
- */
-#define CONF_RAND(cp) mmrand(NULL, (cp)->min, (cp)->maxrand)
+#define COMPRESSION_LIST \
+ "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw)"
static CONFIG c[] = {
{ "abort",
@@ -72,9 +69,9 @@ static CONFIG c[] = {
"if LSM inserts are throttled", /* 90% */
C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL },
- { "firstfit",
- "if allocation is firstfit", /* 10% */
- C_BOOL, 10, 0, 0, &g.c_firstfit, NULL },
+ { "backups",
+ "if backups are enabled", /* 20% */
+ C_BOOL, 20, 0, 0, &g.c_backups, NULL },
{ "bitcnt",
"number of bits for fixed-length column-store files",
@@ -117,8 +114,7 @@ static CONFIG c[] = {
C_BOOL, 10, 0, 0, &g.c_compact, NULL },
{ "compression",
- "type of compression "
- "(none | bzip | bzip-raw | lz4 | lzo | snappy | zlib | zlib-noraw)",
+ "type of compression " COMPRESSION_LIST,
C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_compression },
{ "data_extend",
@@ -127,7 +123,7 @@ static CONFIG c[] = {
{ "data_source",
"data source (file | helium | kvsbdb | lsm | table)",
- C_IGNORE | C_STRING, 0, 0, 0, NULL, &g.c_data_source },
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_data_source },
{ "delete_pct",
"percent operations that are deletes",
@@ -137,6 +133,10 @@ static CONFIG c[] = {
"if values are dictionary compressed", /* 20% */
C_BOOL, 20, 0, 0, &g.c_dictionary, NULL },
+ { "direct_io",
+ "if direct I/O is configured for data objects", /* 0% */
+ C_IGNORE, 0, 0, 1, &g.c_direct_io, NULL },
+
{ "evict_max",
"the maximum number of eviction workers",
0x0, 0, 5, 100, &g.c_evict_max, NULL },
@@ -145,9 +145,9 @@ static CONFIG c[] = {
"type of store to create (fix | var | row)",
C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_file_type },
- { "backups",
- "if backups are enabled", /* 5% */
- C_BOOL, 5, 0, 0, &g.c_backups, NULL },
+ { "firstfit",
+ "if allocation is firstfit", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_firstfit, NULL },
{ "huffman_key",
"if keys are huffman encoded", /* 20% */
@@ -186,26 +186,34 @@ static CONFIG c[] = {
"minimum size of keys",
0x0, 10, 32, 256, &g.c_key_min, NULL },
- { "leak_memory",
- "if memory should be leaked on close",
- C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL },
-
{ "leaf_page_max",
"maximum size of Btree leaf nodes",
0x0, 9, 17, 27, &g.c_leaf_page_max, NULL },
+ { "leak_memory",
+ "if memory should be leaked on close",
+ C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL },
+
{ "logging",
- "if logging configured", /* 30% */
- C_BOOL, 30, 0, 0, &g.c_logging, NULL },
+ "if logging configured", /* 50% */
+ C_BOOL, 50, 0, 0, &g.c_logging, NULL },
{ "logging_archive",
"if log file archival configured", /* 50% */
C_BOOL, 50, 0, 0, &g.c_logging_archive, NULL },
+ { "logging_compression",
+ "type of logging compression " COMPRESSION_LIST,
+ C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_logging_compression },
+
{ "logging_prealloc",
"if log file pre-allocation configured", /* 50% */
C_BOOL, 50, 0, 0, &g.c_logging_prealloc, NULL },
+ { "long_running_txn",
+ "if a long-running transaction configured", /* 0% */
+ C_BOOL, 0, 0, 0, &g.c_long_running_txn, NULL },
+
{ "lsm_worker_threads",
"the number of LSM worker threads",
0x0, 3, 4, 20, &g.c_lsm_worker_threads, NULL },
@@ -230,6 +238,10 @@ static CONFIG c[] = {
"minimum gain before prefix compression is used",
0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL },
+ { "quiet",
+ "quiet run (same as -q)",
+ C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL },
+
{ "repeat_data_pct",
"percent duplicate values in row- or var-length column-stores",
0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL },
@@ -246,6 +258,10 @@ static CONFIG c[] = {
"the number of runs",
C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL },
+ { "salvage",
+ "salvage testing", /* 100% */
+ C_BOOL, 100, 1, 0, &g.c_salvage, NULL },
+
{ "split_pct",
"page split size as a percentage of the maximum page size",
0x0, 40, 85, 85, &g.c_split_pct, NULL },
@@ -266,6 +282,10 @@ static CONFIG c[] = {
"maximum time to run in minutes (default 20 minutes)",
C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL },
+ { "transaction-frequency",
+ "percent operations done inside an explicit transaction",
+ 0x0, 1, 100, 100, &g.c_txn_freq, NULL },
+
{ "value_max",
"maximum size of values",
0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL },
@@ -274,6 +294,10 @@ static CONFIG c[] = {
"minimum size of values",
0x0, 0, 20, 4096, &g.c_value_min, NULL },
+ { "verify",
+ "to regularly verify during a run", /* 100% */
+ C_BOOL, 100, 1, 0, &g.c_verify, NULL },
+
{ "wiredtiger_config",
"configuration string used to wiredtiger_open",
C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 7cd75cd0475..bab9ef26f20 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -26,52 +26,22 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/stat.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <sys/types.h>
-
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <limits.h>
-#ifndef _WIN32
-#include <pthread.h>
-#endif
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-#include <time.h>
-
-#ifdef _WIN32
-#include "windows_shim.h"
-#endif
-
-#include <wt_internal.h>
+#include "test_util.h"
#ifdef BDB
-#include <db.h>
+/*
+ * Berkeley DB has an #ifdef we need to provide a value for, we'll see an
+ * undefined error if it's unset during a strict compile.
+ */
+#ifndef DB_DBM_HSEARCH
+#define DB_DBM_HSEARCH 0
#endif
-
-#if defined(__GNUC__)
-#define WT_GCC_ATTRIBUTE(x) __attribute__(x)
-#else
-#define WT_GCC_ATTRIBUTE(x)
+#include <assert.h>
+#include <db.h>
#endif
-extern WT_EXTENSION_API *wt_api;
-
#define EXTPATH "../../ext/" /* Extensions path */
-#define BZIP_PATH \
- EXTPATH "compressors/bzip2/.libs/libwiredtiger_bzip2.so"
#define LZ4_PATH \
EXTPATH "compressors/lz4/.libs/libwiredtiger_lz4.so"
#define SNAPPY_PATH \
@@ -95,8 +65,6 @@ extern WT_EXTENSION_API *wt_api;
#define KILOBYTE(v) ((v) * 1024)
#undef MEGABYTE
#define MEGABYTE(v) ((v) * 1048576)
-#undef GIGABYTE
-#define GIGABYTE(v) ((v) * 1073741824ULL)
#define WT_NAME "wt" /* Object name */
@@ -105,12 +73,6 @@ extern WT_EXTENSION_API *wt_api;
#define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */
-#ifndef _WIN32
-#define SIZET_FMT "%zu" /* size_t format string */
-#else
-#define SIZET_FMT "%Iu" /* size_t format string */
-#endif
-
typedef struct {
char *progname; /* Program name */
@@ -120,7 +82,6 @@ typedef struct {
char *home_bdb; /* BDB directory */
char *home_config; /* Run CONFIG file path */
char *home_init; /* Initialize home command */
- char *home_kvs; /* KVS directory */
char *home_log; /* Operation log file path */
char *home_rand; /* RNG log file path */
char *home_salvage_copy; /* Salvage copy command */
@@ -128,14 +89,18 @@ typedef struct {
char *helium_mount; /* Helium volume */
+ char wiredtiger_open_config[8 * 1024]; /* Database open config */
+
+#ifdef HAVE_BERKELEY_DB
void *bdb; /* BDB comparison handle */
void *dbc; /* BDB cursor handle */
+#endif
WT_CONNECTION *wts_conn;
WT_EXTENSION_API *wt_api;
int rand_log_stop; /* Logging turned off */
- FILE *rand_log; /* Random number log */
+ FILE *randfp; /* Random number log */
uint32_t run_cnt; /* Run counter */
@@ -146,10 +111,10 @@ typedef struct {
FILE *logfp; /* Log file */
int replay; /* Replaying a run. */
- int track; /* Track progress */
int workers_finished; /* Operations completed */
- pthread_rwlock_t backup_lock; /* Hot backup running */
+ pthread_rwlock_t backup_lock; /* Backup running */
+ pthread_rwlock_t checkpoint_lock; /* Checkpoint running */
WT_RAND_STATE rnd; /* Global RNG state */
@@ -163,6 +128,8 @@ typedef struct {
size_t append_cnt; /* Current unresolved records */
pthread_rwlock_t append_lock; /* Single-thread resolution */
+ pthread_rwlock_t death_lock; /* Single-thread failure */
+
char *uri; /* Object name */
char *config_open; /* Command-line configuration */
@@ -178,23 +145,24 @@ typedef struct {
uint32_t c_cache;
uint32_t c_compact;
uint32_t c_checkpoints;
- char *c_checksum;
+ char *c_checksum;
uint32_t c_chunk_size;
- char *c_compression;
- char *c_config_open;
+ char *c_compression;
+ char *c_config_open;
uint32_t c_data_extend;
- char *c_data_source;
+ char *c_data_source;
uint32_t c_delete_pct;
uint32_t c_dictionary;
+ uint32_t c_direct_io;
uint32_t c_evict_max;
uint32_t c_firstfit;
- char *c_file_type;
+ char *c_file_type;
uint32_t c_huffman_key;
uint32_t c_huffman_value;
uint32_t c_insert_pct;
uint32_t c_internal_key_truncation;
uint32_t c_intl_page_max;
- char *c_isolation;
+ char *c_isolation;
uint32_t c_key_gap;
uint32_t c_key_max;
uint32_t c_key_min;
@@ -202,27 +170,33 @@ typedef struct {
uint32_t c_leak_memory;
uint32_t c_logging;
uint32_t c_logging_archive;
+ char *c_logging_compression;
uint32_t c_logging_prealloc;
+ uint32_t c_long_running_txn;
uint32_t c_lsm_worker_threads;
uint32_t c_merge_max;
uint32_t c_mmap;
uint32_t c_ops;
+ uint32_t c_quiet;
uint32_t c_prefix_compression;
uint32_t c_prefix_compression_min;
uint32_t c_repeat_data_pct;
uint32_t c_reverse;
uint32_t c_rows;
uint32_t c_runs;
+ uint32_t c_salvage;
uint32_t c_split_pct;
uint32_t c_statistics;
uint32_t c_statistics_server;
uint32_t c_threads;
uint32_t c_timer;
+ uint32_t c_txn_freq;
uint32_t c_value_max;
uint32_t c_value_min;
+ uint32_t c_verify;
uint32_t c_write_pct;
-#define FIX 1
+#define FIX 1
#define ROW 2
#define VAR 3
u_int type; /* File type's flag value */
@@ -233,14 +207,14 @@ typedef struct {
u_int c_checksum_flag; /* Checksum flag value */
#define COMPRESS_NONE 1
-#define COMPRESS_BZIP 2
-#define COMPRESS_BZIP_RAW 3
-#define COMPRESS_LZ4 4
-#define COMPRESS_LZO 5
-#define COMPRESS_SNAPPY 6
-#define COMPRESS_ZLIB 7
-#define COMPRESS_ZLIB_NO_RAW 8
+#define COMPRESS_LZ4 2
+#define COMPRESS_LZ4_NO_RAW 3
+#define COMPRESS_LZO 4
+#define COMPRESS_SNAPPY 5
+#define COMPRESS_ZLIB 6
+#define COMPRESS_ZLIB_NO_RAW 7
u_int c_compression_flag; /* Compression flag value */
+ u_int c_logging_compression_flag; /* Log compression flag value */
#define ISOLATION_RANDOM 1
#define ISOLATION_READ_UNCOMMITTED 2
@@ -255,7 +229,7 @@ typedef struct {
} GLOBAL;
extern GLOBAL g;
-typedef struct {
+typedef struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) {
WT_RAND_STATE rnd; /* thread RNG state */
uint64_t search; /* operations */
@@ -277,15 +251,17 @@ typedef struct {
#define TINFO_COMPLETE 2 /* Finished */
#define TINFO_JOINED 3 /* Resolved */
volatile int state; /* state */
-} TINFO WT_GCC_ATTRIBUTE((aligned(64)));
+} TINFO;
+#ifdef HAVE_BERKELEY_DB
void bdb_close(void);
void bdb_insert(const void *, size_t, const void *, size_t);
void bdb_np(int, void *, size_t *, void *, size_t *, int *);
void bdb_open(void);
void bdb_read(uint64_t, void *, size_t *, int *);
void bdb_remove(uint64_t, int *);
-void bdb_update(const void *, size_t, const void *, size_t, int *);
+void bdb_update(const void *, size_t, const void *, size_t);
+#endif
void *backup(void *);
void *compact(void *);
@@ -296,32 +272,29 @@ void config_print(int);
void config_setup(void);
void config_single(const char *, int);
void fclose_and_clear(FILE **);
-void key_gen(uint8_t *, size_t *, uint64_t);
-void key_gen_insert(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t);
-void key_gen_setup(uint8_t **);
+void key_gen(WT_ITEM *, uint64_t);
+void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void key_gen_setup(WT_ITEM *);
void key_len_setup(void);
+void *lrt(void *);
void path_setup(const char *);
+int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
uint32_t rng(WT_RAND_STATE *);
void track(const char *, uint64_t, TINFO *);
-void val_gen(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t);
-void val_gen_setup(WT_RAND_STATE *, uint8_t **);
+void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t);
+void val_gen_setup(WT_RAND_STATE *, WT_ITEM *);
void wts_close(void);
-void wts_create(void);
void wts_dump(const char *, int);
+void wts_init(void);
void wts_load(void);
-void wts_open(const char *, int, WT_CONNECTION **);
+void wts_open(const char *, bool, WT_CONNECTION **);
void wts_ops(int);
void wts_read_scan(void);
+void wts_reopen(void);
void wts_salvage(void);
void wts_stats(void);
void wts_verify(const char *);
-void die(int, const char *, ...)
-#if defined(__GNUC__)
-__attribute__((__noreturn__))
-#endif
-;
-
/*
* mmrand --
* Return a random value between a min/max pair.
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
new file mode 100644
index 00000000000..937525522fa
--- /dev/null
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -0,0 +1,170 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "format.h"
+
+/*
+ * lrt --
+ * Start a long-running transaction.
+ */
+void *
+lrt(void *arg)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ size_t buf_len, buf_size;
+ uint64_t keyno, saved_keyno;
+ u_int period;
+ int pinned, ret;
+ uint8_t bitfield;
+ void *buf;
+
+ (void)(arg); /* Unused parameter */
+
+ saved_keyno = 0; /* [-Werror=maybe-uninitialized] */
+
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
+
+ buf = NULL;
+ buf_len = buf_size = 0;
+
+ /* Open a session and cursor. */
+ conn = g.wts_conn;
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(
+ session, g.uri, NULL, NULL, &cursor));
+
+ for (pinned = 0;;) {
+ if (pinned) {
+ /* Re-read the record at the end of the table. */
+ while ((ret = read_row(
+ cursor, &key, &value, saved_keyno)) == WT_ROLLBACK)
+ ;
+ if (ret != 0)
+ testutil_die(ret,
+ "read_row %" PRIu64, saved_keyno);
+
+ /* Compare the previous value with the current one. */
+ if (g.type == FIX) {
+ ret = cursor->get_value(cursor, &bitfield);
+ value.data = &bitfield;
+ value.size = 1;
+ } else
+ ret = cursor->get_value(cursor, &value);
+ if (ret != 0)
+ testutil_die(ret,
+ "cursor.get_value: %" PRIu64, saved_keyno);
+
+ if (buf_size != value.size ||
+ memcmp(buf, value.data, value.size) != 0)
+ testutil_die(0, "mismatched start/stop values");
+
+ /* End the transaction. */
+ testutil_check(
+ session->commit_transaction(session, NULL));
+
+ /* Reset the cursor, releasing our pin. */
+ testutil_check(cursor->reset(cursor));
+ pinned = 0;
+ } else {
+ /*
+ * Begin transaction: without an explicit transaction,
+ * the snapshot is only kept around while a cursor is
+ * positioned. As soon as the cursor loses its position
+ * a new snapshot will be allocated.
+ */
+ testutil_check(session->begin_transaction(
+ session, "isolation=snapshot"));
+
+ /* Read a record at the end of the table. */
+ do {
+ saved_keyno = mmrand(NULL,
+ (u_int)(g.key_cnt - g.key_cnt / 10),
+ (u_int)g.key_cnt);
+ while ((ret = read_row(cursor,
+ &key, &value, saved_keyno)) == WT_ROLLBACK)
+ ;
+ } while (ret == WT_NOTFOUND);
+ if (ret != 0)
+ testutil_die(ret,
+ "read_row %" PRIu64, saved_keyno);
+
+ /* Copy the cursor's value. */
+ if (g.type == FIX) {
+ ret = cursor->get_value(cursor, &bitfield);
+ value.data = &bitfield;
+ value.size = 1;
+ } else
+ ret = cursor->get_value(cursor, &value);
+ if (ret != 0)
+ testutil_die(ret,
+ "cursor.get_value: %" PRIu64, saved_keyno);
+ if (buf_len < value.size)
+ buf = drealloc(buf, buf_len = value.size);
+ memcpy(buf, value.data, buf_size = value.size);
+
+ /*
+ * Move the cursor to an early record in the table,
+ * hopefully allowing the page with the record just
+ * retrieved to be evicted from memory.
+ */
+ do {
+ keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5);
+ while ((ret = read_row(cursor,
+ &key, &value, keyno)) == WT_ROLLBACK)
+ ;
+ } while (ret == WT_NOTFOUND);
+ if (ret != 0)
+ testutil_die(ret, "read_row %" PRIu64, keyno);
+
+ pinned = 1;
+ }
+
+ /* Sleep for some number of seconds. */
+ period = mmrand(NULL, 1, 10);
+
+ /* Sleep for short periods so we don't make the run wait. */
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ sleep(1);
+ }
+ if (g.workers_finished)
+ break;
+ }
+
+ testutil_check(session->close(session, NULL));
+
+ free(key.mem);
+ free(value.mem);
+ free(buf);
+
+ return (NULL);
+}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 2a2ff91fbf0..9b2aacaecff 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -28,19 +28,21 @@
#include "format.h"
-static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
-static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
-static int col_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
-static int nextprev(WT_CURSOR *, int, int *);
-static int notfound_chk(const char *, int, int, uint64_t);
+static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
+static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t);
+static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int nextprev(WT_CURSOR *, int);
static void *ops(void *);
-static void print_item(const char *, WT_ITEM *);
-static int read_row(WT_CURSOR *, WT_ITEM *, uint64_t);
-static int row_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
-static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
-static int row_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t);
+static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
static void table_append_init(void);
+#ifdef HAVE_BERKELEY_DB
+static int notfound_chk(const char *, int, int, uint64_t);
+static void print_item(const char *, WT_ITEM *);
+#endif
+
/*
* wts_ops --
* Perform a number of operations in a set of threads.
@@ -51,23 +53,17 @@ wts_ops(int lastrun)
TINFO *tinfo, total;
WT_CONNECTION *conn;
WT_SESSION *session;
- pthread_t backup_tid, compact_tid;
+ pthread_t backup_tid, compact_tid, lrt_tid;
int64_t fourths, thread_ops;
uint32_t i;
- int ret, running;
+ int running;
conn = g.wts_conn;
session = NULL; /* -Wconditional-uninitialized */
memset(&backup_tid, 0, sizeof(backup_tid));
memset(&compact_tid, 0, sizeof(compact_tid));
-
- /*
- * We support replay of threaded runs, but don't log random numbers
- * after threaded operations start, there's no point.
- */
- if (!SINGLETHREADED)
- g.rand_log_stop = 1;
+ memset(&lrt_tid, 0, sizeof(lrt_tid));
/*
* There are two mechanisms to specify the length of the run, a number
@@ -87,37 +83,45 @@ wts_ops(int lastrun)
if (g.c_timer == 0)
fourths = -1;
else
- fourths = (g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS;
+ fourths = ((int64_t)g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS;
/* Initialize the table extension code. */
table_append_init();
+ /*
+ * We support replay of threaded runs, but don't log random numbers
+ * after threaded operations start, there's no point.
+ */
+ if (!SINGLETHREADED)
+ g.rand_log_stop = 1;
+
/* Open a session. */
if (g.logging != 0) {
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== thread ops start ===============");
}
/* Create thread structure; start the worker threads. */
- if ((tinfo = calloc((size_t)g.c_threads, sizeof(*tinfo))) == NULL)
- die(errno, "calloc");
+ tinfo = dcalloc((size_t)g.c_threads, sizeof(*tinfo));
for (i = 0; i < g.c_threads; ++i) {
tinfo[i].id = (int)i + 1;
tinfo[i].state = TINFO_RUNNING;
- if ((ret =
- pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i])) != 0)
- die(ret, "pthread_create");
+ testutil_check(
+ pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i]));
}
- /* If a multi-threaded run, start backup and compaction threads. */
- if (g.c_backups &&
- (ret = pthread_create(&backup_tid, NULL, backup, NULL)) != 0)
- die(ret, "pthread_create: backup");
- if (g.c_compact &&
- (ret = pthread_create(&compact_tid, NULL, compact, NULL)) != 0)
- die(ret, "pthread_create: compaction");
+ /*
+ * If a multi-threaded run, start optional backup, compaction and
+ * long-running reader threads.
+ */
+ if (g.c_backups)
+ testutil_check(pthread_create(&backup_tid, NULL, backup, NULL));
+ if (g.c_compact)
+ testutil_check(
+ pthread_create(&compact_tid, NULL, compact, NULL));
+ if (!SINGLETHREADED && g.c_long_running_txn)
+ testutil_check(pthread_create(&lrt_tid, NULL, lrt, NULL));
/* Spin on the threads, calculating the totals. */
for (;;) {
@@ -171,70 +175,247 @@ wts_ops(int lastrun)
}
free(tinfo);
- /* Wait for the backup, compaction thread. */
+ /* Wait for the backup, compaction, long-running reader threads. */
g.workers_finished = 1;
if (g.c_backups)
(void)pthread_join(backup_tid, NULL);
if (g.c_compact)
(void)pthread_join(compact_tid, NULL);
+ if (!SINGLETHREADED && g.c_long_running_txn)
+ (void)pthread_join(lrt_tid, NULL);
+ g.workers_finished = 0;
if (g.logging != 0) {
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== thread ops stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
}
/*
- * ops_session_config --
- * Return the current session configuration.
+ * isolation_config --
+ * Return an isolation configuration.
*/
-static const char *
-ops_session_config(WT_RAND_STATE *rnd)
+static inline const char *
+isolation_config(WT_RAND_STATE *rnd, bool *iso_snapshotp)
{
u_int v;
- /*
- * The only current session configuration is the isolation level.
- */
if ((v = g.c_isolation_flag) == ISOLATION_RANDOM)
v = mmrand(rnd, 2, 4);
switch (v) {
case ISOLATION_READ_UNCOMMITTED:
+ *iso_snapshotp = false;
return ("isolation=read-uncommitted");
case ISOLATION_READ_COMMITTED:
+ *iso_snapshotp = false;
return ("isolation=read-committed");
case ISOLATION_SNAPSHOT:
default:
+ *iso_snapshotp = true;
return ("isolation=snapshot");
}
}
+typedef struct {
+ uint64_t keyno; /* Row number */
+
+ void *kdata; /* If an insert, the generated key */
+ size_t ksize;
+ size_t kmemsize;
+
+ void *vdata; /* If not a delete, the value */
+ size_t vsize;
+ size_t vmemsize;
+
+ bool deleted; /* Delete operation */
+ bool insert; /* Insert operation */
+} SNAP_OPS;
+
+/*
+ * snap_track --
+ * Add a single snapshot isolation returned value to the list.
+ */
+static void
+snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value)
+{
+ snap->keyno = keyno;
+ if (key == NULL)
+ snap->insert = false;
+ else {
+ snap->insert = true;
+
+ if (snap->kmemsize < key->size) {
+ snap->kdata = drealloc(snap->kdata, key->size);
+ snap->kmemsize = key->size;
+ }
+ memcpy(snap->kdata, key->data, snap->ksize = key->size);
+ }
+ if (value == NULL)
+ snap->deleted = true;
+ else {
+ snap->deleted = false;
+ if (snap->vmemsize < value->size) {
+ snap->vdata = drealloc(snap->vdata, value->size);
+ snap->vmemsize = value->size;
+ }
+ memcpy(snap->vdata, value->data, snap->vsize = value->size);
+ }
+}
+
+/*
+ * snap_check --
+ * Check snapshot isolation operations are repeatable.
+ */
+static int
+snap_check(WT_CURSOR *cursor,
+ SNAP_OPS *start, SNAP_OPS *stop, WT_ITEM *key, WT_ITEM *value)
+{
+ WT_DECL_RET;
+ SNAP_OPS *p;
+ uint8_t bitfield;
+
+ for (; start < stop; ++start) {
+ /* Check for subsequent changes to this record. */
+ for (p = start + 1; p < stop && p->keyno != start->keyno; ++p)
+ ;
+ if (p != stop)
+ continue;
+
+ /*
+ * Retrieve the key/value pair by key. Row-store inserts have a
+ * unique generated key we saved, else generate the key from the
+ * key number.
+ */
+ if (start->insert == 0) {
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ cursor->set_key(cursor, start->keyno);
+ break;
+ case ROW:
+ key_gen(key, start->keyno);
+ cursor->set_key(cursor, key);
+ break;
+ }
+ } else {
+ key->data = start->kdata;
+ key->size = start->ksize;
+ cursor->set_key(cursor, key);
+ }
+ if ((ret = cursor->search(cursor)) == 0) {
+ if (g.type == FIX) {
+ testutil_check(
+ cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
+ } else
+ testutil_check(
+ cursor->get_value(cursor, value));
+ } else
+ if (ret != WT_NOTFOUND)
+ return (ret);
+
+ /* Check for simple matches. */
+ if (ret == 0 && !start->deleted &&
+ value->size == start->vsize &&
+ memcmp(value->data, start->vdata, value->size) == 0)
+ continue;
+ if (ret == WT_NOTFOUND && start->deleted)
+ continue;
+
+ /*
+ * In fixed length stores, zero values at the end of the key
+ * space are returned as not-found, and not-found row reads
+ * are saved as zero values. Map back-and-forth for simplicity.
+ */
+ if (g.type == FIX) {
+ if (ret == WT_NOTFOUND &&
+ start->vsize == 1 && *(uint8_t *)start->vdata == 0)
+ continue;
+ if (start->deleted &&
+ value->size == 1 && *(uint8_t *)value->data == 0)
+ continue;
+ }
+
+ /* Things went pear-shaped. */
+ switch (g.type) {
+ case FIX:
+ testutil_die(ret,
+ "snapshot-isolation: %" PRIu64 " search: "
+ "expected {0x%02x}, found {0x%02x}",
+ start->keyno,
+ start->deleted ? 0 : *(uint8_t *)start->vdata,
+ ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
+ /* NOTREACHED */
+ case ROW:
+ testutil_die(ret,
+ "snapshot-isolation: %.*s search: "
+ "expected {%.*s}, found {%.*s}",
+ (int)key->size, key->data,
+ start->deleted ?
+ (int)strlen("deleted") : (int)start->vsize,
+ start->deleted ? "deleted" : start->vdata,
+ ret == WT_NOTFOUND ?
+ (int)strlen("deleted") : (int)value->size,
+ ret == WT_NOTFOUND ? "deleted" : value->data);
+ /* NOTREACHED */
+ case VAR:
+ testutil_die(ret,
+ "snapshot-isolation: %" PRIu64 " search: "
+ "expected {%.*s}, found {%.*s}",
+ start->keyno,
+ start->deleted ?
+ (int)strlen("deleted") : (int)start->vsize,
+ start->deleted ? "deleted" : start->vdata,
+ ret == WT_NOTFOUND ?
+ (int)strlen("deleted") : (int)value->size,
+ ret == WT_NOTFOUND ? "deleted" : value->data);
+ /* NOTREACHED */
+ }
+ }
+ return (0);
+}
+
+/*
+ * ops --
+ * Per-thread operations.
+ */
static void *
ops(void *arg)
{
+ SNAP_OPS *snap, snap_list[64];
TINFO *tinfo;
WT_CONNECTION *conn;
WT_CURSOR *cursor, *cursor_insert;
+ WT_DECL_RET;
+ WT_ITEM *key, _key, *value, _value;
WT_SESSION *session;
- WT_ITEM key, value;
uint64_t keyno, ckpt_op, session_op;
- uint32_t op;
- uint8_t *keybuf, *valbuf;
- u_int np;
- int ckpt_available, dir, insert, intxn, notfound, readonly, ret;
+ uint32_t op, rnd;
+ u_int i;
+ int dir;
char *ckpt_config, ckpt_name[64];
+ bool ckpt_available, intxn, iso_snapshot, positioned, readonly;
tinfo = arg;
conn = g.wts_conn;
- keybuf = valbuf = NULL;
- readonly = 0; /* -Wconditional-uninitialized */
+ readonly = false; /* -Wconditional-uninitialized */
+
+ /* Initialize tracking of snapshot isolation transaction returns. */
+ snap = NULL;
+ iso_snapshot = false;
+ memset(snap_list, 0, sizeof(snap_list));
+
+ /* Initialize the per-thread random number generator. */
+ __wt_random_init(&tinfo->rnd);
/* Set up the default key and value buffers. */
- key_gen_setup(&keybuf);
- val_gen_setup(&tinfo->rnd, &valbuf);
+ key = &_key;
+ key_gen_setup(key);
+ value = &_value;
+ val_gen_setup(&tinfo->rnd, value);
/* Set the first operation where we'll create sessions and cursors. */
session_op = 0;
@@ -243,32 +424,29 @@ ops(void *arg)
/* Set the first operation where we'll perform checkpoint operations. */
ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0;
- ckpt_available = 0;
+ ckpt_available = false;
- for (intxn = 0; !tinfo->quit; ++tinfo->ops) {
+ for (intxn = false; !tinfo->quit; ++tinfo->ops) {
/*
* We can't checkpoint or swap sessions/cursors while in a
* transaction, resolve any running transaction.
*/
if (intxn &&
(tinfo->ops == ckpt_op || tinfo->ops == session_op)) {
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- die(ret, "session.commit_transaction");
+ testutil_check(
+ session->commit_transaction(session, NULL));
++tinfo->commit;
- intxn = 0;
+ intxn = false;
}
/* Open up a new session and cursors. */
if (tinfo->ops == session_op ||
session == NULL || cursor == NULL) {
- if (session != NULL &&
- (ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ if (session != NULL)
+ testutil_check(session->close(session, NULL));
- if ((ret = conn->open_session(conn, NULL,
- ops_session_config(&tinfo->rnd), &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(
+ conn->open_session(conn, NULL, NULL, &session));
/*
* 10% of the time, perform some read-only operations
@@ -283,15 +461,14 @@ ops(void *arg)
*/
if (!SINGLETHREADED && !DATASOURCE("lsm") &&
ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) {
- if ((ret = session->open_cursor(session,
- g.uri, NULL, ckpt_name, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(session,
+ g.uri, NULL, ckpt_name, &cursor));
/* Pick the next session/cursor close/open. */
session_op += 250;
/* Checkpoints are read-only. */
- readonly = 1;
+ readonly = true;
} else {
/*
* Open two cursors: one for overwriting and one
@@ -306,33 +483,43 @@ ops(void *arg)
* want to have to specify the record number,
* which requires an append configuration.
*/
- if ((ret = session->open_cursor(session, g.uri,
- NULL, "overwrite", &cursor)) != 0)
- die(ret, "session.open_cursor");
- if ((g.type == FIX || g.type == VAR) &&
- (ret = session->open_cursor(session, g.uri,
- NULL, "append", &cursor_insert)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(session,
+ g.uri, NULL, "overwrite", &cursor));
+ if (g.type == FIX || g.type == VAR)
+ testutil_check(session->open_cursor(
+ session, g.uri,
+ NULL, "append", &cursor_insert));
/* Pick the next session/cursor close/open. */
- session_op += 100 * mmrand(&tinfo->rnd, 1, 50);
+ session_op += mmrand(&tinfo->rnd, 100, 5000);
/* Updates supported. */
- readonly = 0;
+ readonly = false;
}
}
/* Checkpoint the database. */
if (tinfo->ops == ckpt_op && g.c_checkpoints) {
/*
- * LSM and data-sources don't support named checkpoints,
+ * Checkpoints are single-threaded inside WiredTiger,
+ * skip our checkpoint if another thread is already
+ * doing one.
+ */
+ ret = pthread_rwlock_trywrlock(&g.checkpoint_lock);
+ if (ret == EBUSY)
+ goto skip_checkpoint;
+ testutil_check(ret);
+
+ /*
+ * LSM and data-sources don't support named checkpoints
* and we can't drop a named checkpoint while there's a
- * cursor open on it, otherwise 20% of the time name the
- * checkpoint.
+ * backup in progress, otherwise name the checkpoint 5%
+ * of the time.
*/
- if (DATASOURCE("helium") || DATASOURCE("kvsbdb") ||
- DATASOURCE("lsm") || readonly ||
- mmrand(&tinfo->rnd, 1, 5) == 1)
+ if (mmrand(&tinfo->rnd, 1, 20) != 1 ||
+ DATASOURCE("helium") ||
+ DATASOURCE("kvsbdb") || DATASOURCE("lsm") ||
+ pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY)
ckpt_config = NULL;
else {
(void)snprintf(ckpt_name, sizeof(ckpt_name),
@@ -340,22 +527,22 @@ ops(void *arg)
ckpt_config = ckpt_name;
}
- /* Named checkpoints lock out backups */
- if (ckpt_config != NULL &&
- (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0)
- die(ret,
- "pthread_rwlock_wrlock: backup lock");
-
- if ((ret =
- session->checkpoint(session, ckpt_config)) != 0)
- die(ret, "session.checkpoint%s%s",
- ckpt_config == NULL ? "" : ": ",
+ ret = session->checkpoint(session, ckpt_config);
+ /*
+ * We may be trying to create a named checkpoint while
+ * we hold a cursor open to the previous checkpoint.
+ * Tolerate EBUSY.
+ */
+ if (ret != 0 && ret != EBUSY)
+ testutil_die(ret, "%s",
ckpt_config == NULL ? "" : ckpt_config);
+ ret = 0;
- if (ckpt_config != NULL &&
- (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0)
- die(ret,
- "pthread_rwlock_wrlock: backup lock");
+ if (ckpt_config != NULL)
+ testutil_check(
+ pthread_rwlock_unlock(&g.backup_lock));
+ testutil_check(
+ pthread_rwlock_unlock(&g.checkpoint_lock));
/* Rephrase the checkpoint name for cursor open. */
if (ckpt_config == NULL)
@@ -364,29 +551,31 @@ ops(void *arg)
else
(void)snprintf(ckpt_name, sizeof(ckpt_name),
"checkpoint=thread-%d", tinfo->id);
- ckpt_available = 1;
+ ckpt_available = true;
- /* Pick the next checkpoint operation. */
- ckpt_op += 1000 * mmrand(&tinfo->rnd, 5, 20);
+skip_checkpoint: /* Pick the next checkpoint operation. */
+ ckpt_op += mmrand(&tinfo->rnd, 5000, 20000);
}
/*
- * If we're not single-threaded and we're not in a transaction,
- * start a transaction 20% of the time.
+ * If we're not single-threaded and not in a transaction, choose
+ * an isolation level and start a transaction some percentage of
+ * the time.
*/
- if (!SINGLETHREADED && !intxn &&
- mmrand(&tinfo->rnd, 1, 10) >= 8) {
- if ((ret =
- session->begin_transaction(session, NULL)) != 0)
- die(ret, "session.begin_transaction");
- intxn = 1;
+ if (!SINGLETHREADED &&
+ !intxn && mmrand(&tinfo->rnd, 1, 100) >= g.c_txn_freq) {
+ testutil_check(
+ session->reconfigure(session,
+ isolation_config(&tinfo->rnd, &iso_snapshot)));
+ testutil_check(
+ session->begin_transaction(session, NULL));
+
+ snap = iso_snapshot ? snap_list : NULL;
+ intxn = true;
}
- insert = notfound = 0;
-
- keyno = mmrand(&tinfo->rnd, 1, g.rows);
- key.data = keybuf;
- value.data = valbuf;
+ keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
+ positioned = false;
/*
* Perform some number of operations: the percentage of deletes,
@@ -395,32 +584,35 @@ ops(void *arg)
* of deletes will mean fewer inserts and writes. Modifications
* are always followed by a read to confirm it worked.
*/
- op = readonly ? UINT32_MAX : (uint32_t)(rng(&tinfo->rnd) % 100);
+ op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100);
if (op < g.c_delete_pct) {
++tinfo->remove;
switch (g.type) {
case ROW:
- /*
- * If deleting a non-existent record, the cursor
- * won't be positioned, and so can't do a next.
- */
- if (row_remove(cursor, &key, keyno, &notfound))
- goto deadlock;
+ ret = row_remove(cursor, key, keyno);
break;
case FIX:
case VAR:
- if (col_remove(cursor, &key, keyno, &notfound))
- goto deadlock;
+ ret = col_remove(cursor, key, keyno);
break;
}
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, NULL);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
+ }
} else if (op < g.c_delete_pct + g.c_insert_pct) {
++tinfo->insert;
switch (g.type) {
case ROW:
- if (row_insert(
- tinfo, cursor, &key, &value, keyno))
- goto deadlock;
- insert = 1;
+ key_gen_insert(&tinfo->rnd, key, keyno);
+ val_gen(&tinfo->rnd, value, keyno);
+ ret = row_insert(cursor, key, value, keyno);
break;
case FIX:
case VAR:
@@ -433,38 +625,60 @@ ops(void *arg)
goto skip_insert;
/* Insert, then reset the insert cursor. */
- if (col_insert(tinfo,
- cursor_insert, &key, &value, &keyno))
- goto deadlock;
- if ((ret =
- cursor_insert->reset(cursor_insert)) != 0)
- die(ret, "cursor.reset");
-
- insert = 1;
+ val_gen(&tinfo->rnd, value, g.rows + 1);
+ ret = col_insert(
+ cursor_insert, key, value, &keyno);
+ testutil_check(
+ cursor_insert->reset(cursor_insert));
break;
}
+ positioned = false;
+ if (ret == 0) {
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno,
+ g.type == ROW ? key : NULL, value);
+ } else
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
} else if (
op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) {
++tinfo->update;
switch (g.type) {
case ROW:
- if (row_update(
- tinfo, cursor, &key, &value, keyno))
- goto deadlock;
+ key_gen(key, keyno);
+ val_gen(&tinfo->rnd, value, keyno);
+ ret = row_update(cursor, key, value, keyno);
break;
case FIX:
case VAR:
-skip_insert: if (col_update(
- tinfo, cursor, &key, &value, keyno))
- goto deadlock;
+skip_insert: val_gen(&tinfo->rnd, value, keyno);
+ ret = col_update(cursor, key, value, keyno);
break;
}
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, value);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
+ goto deadlock;
+ }
} else {
++tinfo->search;
- if (read_row(cursor, &key, keyno))
- if (intxn)
+ ret = read_row(cursor, key, value, keyno);
+ if (ret == 0) {
+ positioned = true;
+ if (snap != NULL && (size_t)
+ (snap - snap_list) < WT_ELEMENTS(snap_list))
+ snap_track(snap++, keyno, NULL, value);
+ } else {
+ positioned = false;
+ if (ret == WT_ROLLBACK && intxn)
goto deadlock;
- continue;
+ }
}
/*
@@ -472,59 +686,64 @@ skip_insert: if (col_update(
* insert, do a small number of next/prev cursor operations in
* a random direction.
*/
- if (!insert) {
+ if (positioned) {
dir = (int)mmrand(&tinfo->rnd, 0, 1);
- for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) {
- if (notfound)
- break;
- if (nextprev(cursor, dir, &notfound))
+ for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) {
+ if ((ret = nextprev(cursor, dir)) == 0)
+ continue;
+ if (ret == WT_ROLLBACK && intxn)
goto deadlock;
+ break;
}
}
- /* Read to confirm the operation. */
- ++tinfo->search;
- if (read_row(cursor, &key, keyno))
- goto deadlock;
-
/* Reset the cursor: there is no reason to keep pages pinned. */
- if (cursor != NULL && (ret = cursor->reset(cursor)) != 0)
- die(ret, "cursor.reset");
+ testutil_check(cursor->reset(cursor));
/*
- * If we're in the transaction, commit 40% of the time and
+ * If we're in a transaction, commit 40% of the time and
* rollback 10% of the time.
*/
- if (intxn)
- switch (mmrand(&tinfo->rnd, 1, 10)) {
- case 1: case 2: case 3: case 4: /* 40% */
- if ((ret = session->commit_transaction(
- session, NULL)) != 0)
- die(ret, "session.commit_transaction");
- ++tinfo->commit;
- intxn = 0;
- break;
- case 5: /* 10% */
- if (0) {
-deadlock: ++tinfo->deadlock;
- }
- if ((ret = session->rollback_transaction(
- session, NULL)) != 0)
- die(ret,
- "session.rollback_transaction");
- ++tinfo->rollback;
- intxn = 0;
- break;
- default:
- break;
+ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5)
+ continue;
+
+ /*
+ * Ending the transaction. If in snapshot isolation, repeat the
+ * operations and confirm they're unchanged.
+ */
+ if (snap != NULL && (ret = snap_check(
+ cursor, snap_list, snap, key, value)) == WT_ROLLBACK)
+ goto deadlock;
+
+ switch (rnd) {
+ case 1: case 2: case 3: case 4: /* 40% */
+ testutil_check(
+ session->commit_transaction(session, NULL));
+ ++tinfo->commit;
+ break;
+ case 5: /* 10% */
+ if (0) {
+deadlock: ++tinfo->deadlock;
}
+ testutil_check(
+ session->rollback_transaction(session, NULL));
+ ++tinfo->rollback;
+ break;
+ }
+
+ intxn = false;
+ snap = NULL;
}
- if (session != NULL && (ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ if (session != NULL)
+ testutil_check(session->close(session, NULL));
- free(keybuf);
- free(valbuf);
+ for (i = 0; i < WT_ELEMENTS(snap_list); ++i) {
+ free(snap_list[i].kdata);
+ free(snap_list[i].vdata);
+ }
+ free(key->mem);
+ free(value->mem);
tinfo->state = TINFO_COMPLETE;
return (NULL);
@@ -539,56 +758,59 @@ wts_read_scan(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
- WT_ITEM key;
+ WT_DECL_RET;
+ WT_ITEM key, value;
WT_SESSION *session;
- uint64_t cnt, last_cnt;
- uint8_t *keybuf;
- int ret;
+ uint64_t keyno, last_keyno;
conn = g.wts_conn;
- /* Set up the default key buffer. */
- key_gen_setup(&keybuf);
+ /* Set up the default key/value buffers. */
+ key_gen_setup(&key);
+ val_gen_setup(NULL, &value);
/* Open a session and cursor pair. */
- if ((ret = conn->open_session(
- conn, NULL, ops_session_config(NULL), &session)) != 0)
- die(ret, "connection.open_session");
- if ((ret = session->open_cursor(
- session, g.uri, NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, g.uri, NULL, NULL, &cursor));
/* Check a random subset of the records using the key. */
- for (last_cnt = cnt = 0; cnt < g.key_cnt;) {
- cnt += rng(NULL) % 17 + 1;
- if (cnt > g.rows)
- cnt = g.rows;
- if (cnt - last_cnt > 1000) {
- track("read row scan", cnt, NULL);
- last_cnt = cnt;
+ for (last_keyno = keyno = 0; keyno < g.key_cnt;) {
+ keyno += mmrand(NULL, 1, 17);
+ if (keyno > g.rows)
+ keyno = g.rows;
+ if (keyno - last_keyno > 1000) {
+ track("read row scan", keyno, NULL);
+ last_keyno = keyno;
}
- key.data = keybuf;
- if ((ret = read_row(cursor, &key, cnt)) != 0)
- die(ret, "read_scan");
+ switch (ret = read_row(cursor, &key, &value, keyno)) {
+ case 0:
+ case WT_NOTFOUND:
+ case WT_ROLLBACK:
+ break;
+ default:
+ testutil_die(
+ ret, "wts_read_scan: read row %" PRIu64, keyno);
+ }
}
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
- free(keybuf);
+ free(key.mem);
+ free(value.mem);
}
/*
* read_row --
* Read and verify a single element in a row- or column-store file.
*/
-static int
-read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
+int
+read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
- WT_ITEM bdb_value, value;
+ static int sn = 0;
WT_SESSION *session;
- int notfound, ret;
+ int exact, ret;
uint8_t bitfield;
session = cursor->session;
@@ -605,56 +827,74 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
cursor->set_key(cursor, keyno);
break;
case ROW:
- key_gen((uint8_t *)key->data, &key->size, keyno);
+ key_gen(key, keyno);
cursor->set_key(cursor, key);
break;
}
- if ((ret = cursor->search(cursor)) == 0) {
+ if (sn) {
+ ret = cursor->search_near(cursor, &exact);
+ if (ret == 0 && exact != 0)
+ ret = WT_NOTFOUND;
+ sn = 0;
+ } else {
+ ret = cursor->search(cursor);
+ sn = 1;
+ }
+ switch (ret) {
+ case 0:
if (g.type == FIX) {
- ret = cursor->get_value(cursor, &bitfield);
- value.data = &bitfield;
- value.size = 1;
- } else {
- ret = cursor->get_value(cursor, &value);
+ testutil_check(cursor->get_value(cursor, &bitfield));
+ *(uint8_t *)(value->data) = bitfield;
+ value->size = 1;
+ } else
+ testutil_check(cursor->get_value(cursor, value));
+ break;
+ case WT_NOTFOUND:
+ /*
+ * In fixed length stores, zero values at the end of the key
+ * space are returned as not found. Treat this the same as
+ * a zero value in the key space, to match BDB's behavior.
+ */
+ if (g.type == FIX) {
+ *(uint8_t *)(value->data) = 0;
+ value->size = 1;
+ ret = 0;
}
- }
- if (ret == WT_ROLLBACK)
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "read_row: read row %" PRIu64, keyno);
- /*
- * In fixed length stores, zero values at the end of the key space are
- * returned as not found. Treat this the same as a zero value in the
- * key space, to match BDB's behavior.
- */
- if (ret == WT_NOTFOUND && g.type == FIX) {
- bitfield = 0;
- value.data = &bitfield;
- value.size = 1;
- ret = 0;
+ default:
+ testutil_die(ret, "read_row: read row %" PRIu64, keyno);
}
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
/* Retrieve the BDB value. */
+ {
+ WT_ITEM bdb_value;
+ int notfound;
+
bdb_read(keyno, &bdb_value.data, &bdb_value.size, &notfound);
/* Check for not-found status. */
if (notfound_chk("read_row", ret, notfound, keyno))
- return (0);
+ return (ret);
/* Compare the two. */
- if (value.size != bdb_value.size ||
- memcmp(value.data, bdb_value.data, value.size) != 0) {
+ if (value->size != bdb_value.size ||
+ memcmp(value->data, bdb_value.data, value->size) != 0) {
fprintf(stderr,
- "read_row: read row value mismatch %" PRIu64 ":\n", keyno);
+ "read_row: value mismatch %" PRIu64 ":\n", keyno);
print_item("bdb", &bdb_value);
- print_item(" wt", &value);
- die(0, NULL);
+ print_item(" wt", value);
+ testutil_die(0, NULL);
}
- return (0);
+ }
+#endif
+ return (ret);
}
/*
@@ -662,24 +902,19 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
* Read and verify the next/prev element in a row- or column-store file.
*/
static int
-nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
+nextprev(WT_CURSOR *cursor, int next)
{
- WT_ITEM key, value, bdb_key, bdb_value;
- WT_SESSION *session;
+ WT_DECL_RET;
+ WT_ITEM key, value;
uint64_t keyno;
- int notfound, ret;
uint8_t bitfield;
const char *which;
- char *p;
- session = cursor->session;
+ keyno = 0;
which = next ? "next" : "prev";
- keyno = 0;
- ret = next ? cursor->next(cursor) : cursor->prev(cursor);
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- if (ret == 0)
+ switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) {
+ case 0:
switch (g.type) {
case FIX:
if ((ret = cursor->get_key(cursor, &keyno)) == 0 &&
@@ -697,19 +932,35 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
ret = cursor->get_value(cursor, &value);
break;
}
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "%s", which);
- *notfoundp = (ret == WT_NOTFOUND);
+ if (ret != 0)
+ testutil_die(ret, "nextprev: get_key/get_value");
+ break;
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
+ return (WT_ROLLBACK);
+ default:
+ testutil_die(ret, "%s", which);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
+
+ {
+ WT_ITEM bdb_key, bdb_value;
+ WT_SESSION *session;
+ int notfound;
+ char *p;
+
+ session = cursor->session;
/* Retrieve the BDB value. */
bdb_np(next, &bdb_key.data, &bdb_key.size,
&bdb_value.data, &bdb_value.size, &notfound);
if (notfound_chk(
next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno))
- return (0);
+ return (ret);
/* Compare the two. */
if (g.type == ROW) {
@@ -718,7 +969,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
fprintf(stderr, "nextprev: %s key mismatch:\n", which);
print_item("bdb-key", &bdb_key);
print_item(" wt-key", &key);
- die(0, NULL);
+ testutil_die(0, NULL);
}
} else {
if (keyno != (uint64_t)atoll(bdb_key.data)) {
@@ -728,7 +979,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
"nextprev: %s key mismatch: %.*s != %" PRIu64 "\n",
which,
(int)bdb_key.size, (char *)bdb_key.data, keyno);
- die(0, NULL);
+ testutil_die(0, NULL);
}
}
if (value.size != bdb_value.size ||
@@ -736,7 +987,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
fprintf(stderr, "nextprev: %s value mismatch:\n", which);
print_item("bdb-value", &bdb_value);
print_item(" wt-value", &value);
- die(0, NULL);
+ testutil_die(0, NULL);
}
if (g.logging == LOG_OPS)
@@ -758,7 +1009,9 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
keyno, (int)value.size, (char *)value.data);
break;
}
- return (0);
+ }
+#endif
+ return (ret);
}
/*
@@ -766,37 +1019,38 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
* Update a row in a row-store file.
*/
static int
-row_update(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int notfound, ret;
session = cursor->session;
- key_gen((uint8_t *)key->data, &key->size, keyno);
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s{%.*s}\n%-10s{%.*s}",
- "putK", (int)key->size, (char *)key->data,
- "putV", (int)value->size, (char *)value->data);
+ "%-10s{%.*s}, {%.*s}",
+ "put",
+ (int)key->size, key->data, (int)value->size, value->data);
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
- ret = cursor->update(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->update(cursor)) {
+ case 0:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_update: update row %" PRIu64 " by key", keyno);
+ default:
+ testutil_die(ret,
+ "row_update: update row %" PRIu64 " by key", keyno);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("row_update", ret, notfound, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
+#endif
return (0);
}
@@ -805,16 +1059,13 @@ row_update(TINFO *tinfo,
* Update a row in a column-store file.
*/
static int
-col_update(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int notfound, ret;
session = cursor->session;
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS) {
if (g.type == FIX)
@@ -834,18 +1085,24 @@ col_update(TINFO *tinfo,
cursor->set_value(cursor, *(uint8_t *)value->data);
else
cursor->set_value(cursor, value);
- ret = cursor->update(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->update(cursor)) {
+ case 0:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "col_update: %" PRIu64, keyno);
+ default:
+ testutil_die(ret, "col_update: %" PRIu64, keyno);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("col_update", ret, notfound, keyno);
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
+#else
+ (void)key; /* [-Wunused-variable] */
+#endif
return (0);
}
@@ -860,12 +1117,8 @@ table_append_init(void)
g.append_max = (size_t)g.c_threads * 10;
g.append_cnt = 0;
- if (g.append != NULL) {
- free(g.append);
- g.append = NULL;
- }
- if ((g.append = calloc(g.append_max, sizeof(uint64_t))) == NULL)
- die(errno, "calloc");
+ free(g.append);
+ g.append = dcalloc(g.append_max, sizeof(uint64_t));
}
/*
@@ -876,13 +1129,13 @@ static void
table_append(uint64_t keyno)
{
uint64_t *p, *ep;
- int done, ret;
+ int done;
ep = g.append + g.append_max;
/*
* We don't want to ignore records we append, which requires we update
- * the "last row" as we insert new records. Threads allocating record
+ * the "last row" as we insert new records. Threads allocating record
* numbers can race with other threads, so the thread allocating record
* N may return after the thread allocating N + 1. We can't update a
* record before it's been inserted, and so we can't leave gaps when the
@@ -900,15 +1153,14 @@ table_append(uint64_t keyno)
* to sleep (so the append table fills up), then N threads of control
* used the same g.append_cnt value to decide there was an available
* slot in the append table and both allocated new records, we could run
- * out of space in the table. It's unfortunately not even unlikely in
+ * out of space in the table. It's unfortunately not even unlikely in
* the case of a large number of threads all inserting as fast as they
* can and a single thread going to sleep for an unexpectedly long time.
* If it happens, sleep and retry until earlier records are resolved
* and we find a slot.
*/
for (done = 0;;) {
- if ((ret = pthread_rwlock_wrlock(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_wrlock: append_lock");
+ testutil_check(pthread_rwlock_wrlock(&g.append_lock));
/*
* If this is the thread we've been waiting for, and its record
@@ -945,8 +1197,7 @@ table_append(uint64_t keyno)
break;
}
- if ((ret = pthread_rwlock_unlock(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_unlock: append_lock");
+ testutil_check(pthread_rwlock_unlock(&g.append_lock));
if (done)
break;
@@ -959,37 +1210,38 @@ table_append(uint64_t keyno)
* Insert a row in a row-store file.
*/
static int
-row_insert(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int notfound, ret;
session = cursor->session;
- key_gen_insert(&tinfo->rnd, (uint8_t *)key->data, &key->size, keyno);
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno);
-
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s{%.*s}\n%-10s{%.*s}",
- "insertK", (int)key->size, (char *)key->data,
- "insertV", (int)value->size, (char *)value->data);
+ "%-10s{%.*s}, {%.*s}",
+ "insert",
+ (int)key->size, key->data, (int)value->size, value->data);
cursor->set_key(cursor, key);
cursor->set_value(cursor, value);
- ret = cursor->insert(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret = cursor->insert(cursor)) {
+ case 0:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_insert: insert row %" PRIu64 " by key", keyno);
+ default:
+ testutil_die(ret,
+ "row_insert: insert row %" PRIu64 " by key", keyno);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
- (void)notfound_chk("row_insert", ret, notfound, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
+#endif
return (0);
}
@@ -998,28 +1250,27 @@ row_insert(TINFO *tinfo,
* Insert an element in a column-store file.
*/
static int
-col_insert(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
+col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
{
+ WT_DECL_RET;
WT_SESSION *session;
uint64_t keyno;
- int notfound, ret;
session = cursor->session;
- val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, g.rows + 1);
-
if (g.type == FIX)
cursor->set_value(cursor, *(uint8_t *)value->data);
else
cursor->set_value(cursor, value);
- if ((ret = cursor->insert(cursor)) != 0) {
- if (ret == WT_ROLLBACK)
- return (WT_ROLLBACK);
- die(ret, "cursor.insert");
+ switch (ret = cursor->insert(cursor)) {
+ case 0:
+ break;
+ case WT_ROLLBACK:
+ return (WT_ROLLBACK);
+ default:
+ testutil_die(ret, "cursor.insert");
}
- if ((ret = cursor->get_key(cursor, &keyno)) != 0)
- die(ret, "cursor.get_key");
+ testutil_check(cursor->get_key(cursor, &keyno));
*keynop = (uint32_t)keyno;
table_append(keyno); /* Extend the object. */
@@ -1037,11 +1288,15 @@ col_insert(TINFO *tinfo,
(int)value->size, (char *)value->data);
}
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, value->data, value->size, &notfound);
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, value->data, value->size);
+#else
+ (void)key; /* [-Wunused-variable] */
+#endif
return (0);
}
@@ -1050,14 +1305,14 @@ col_insert(TINFO *tinfo,
* Remove an row from a row-store file.
*/
static int
-row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
+row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int notfound, ret;
session = cursor->session;
- key_gen((uint8_t *)key->data, &key->size, keyno);
+ key_gen(key, keyno);
/* Log the operation */
if (g.logging == LOG_OPS)
@@ -1068,18 +1323,31 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret) {
+ case 0:
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "row_remove: remove %" PRIu64 " by key", keyno);
- *notfoundp = (ret == WT_NOTFOUND);
+ default:
+ testutil_die(ret,
+ "row_remove: remove %" PRIu64 " by key", keyno);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
+
+ {
+ int notfound;
bdb_remove(keyno, &notfound);
(void)notfound_chk("row_remove", ret, notfound, keyno);
- return (0);
+ }
+#else
+ (void)key; /* [-Wunused-variable] */
+#endif
+ return (ret);
}
/*
@@ -1087,10 +1355,10 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
* Remove a row from a column-store file.
*/
static int
-col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
+col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
{
+ WT_DECL_RET;
WT_SESSION *session;
- int notfound, ret;
session = cursor->session;
@@ -1103,28 +1371,41 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- if (ret == WT_ROLLBACK)
+ switch (ret) {
+ case 0:
+ case WT_NOTFOUND:
+ break;
+ case WT_ROLLBACK:
return (WT_ROLLBACK);
- if (ret != 0 && ret != WT_NOTFOUND)
- die(ret, "col_remove: remove %" PRIu64 " by key", keyno);
- *notfoundp = (ret == WT_NOTFOUND);
+ default:
+ testutil_die(ret,
+ "col_remove: remove %" PRIu64 " by key", keyno);
+ }
+#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
- return (0);
+ return (ret);
/*
* Deleting a fixed-length item is the same as setting the bits to 0;
* do the same thing for the BDB store.
*/
if (g.type == FIX) {
- key_gen((uint8_t *)key->data, &key->size, keyno);
- bdb_update(key->data, key->size, "\0", 1, &notfound);
- } else
+ key_gen(key, keyno);
+ bdb_update(key->data, key->size, "\0", 1);
+ } else {
+ int notfound;
+
bdb_remove(keyno, &notfound);
- (void)notfound_chk("col_remove", ret, notfound, keyno);
- return (0);
+ (void)notfound_chk("col_remove", ret, notfound, keyno);
+ }
+#else
+ (void)key; /* [-Wunused-variable] */
+#endif
+ return (ret);
}
+#ifdef HAVE_BERKELEY_DB
/*
* notfound_chk --
* Compare notfound returns for consistency.
@@ -1142,7 +1423,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
fprintf(stderr, " row %" PRIu64 ":", keyno);
fprintf(stderr,
" not found in Berkeley DB, found in WiredTiger\n");
- die(0, NULL);
+ testutil_die(0, NULL);
}
if (wt_ret == WT_NOTFOUND) {
fprintf(stderr, "%s: %s:", g.progname, f);
@@ -1150,7 +1431,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
fprintf(stderr, " row %" PRIu64 ":", keyno);
fprintf(stderr,
" found in Berkeley DB, not found in WiredTiger\n");
- die(0, NULL);
+ testutil_die(0, NULL);
}
return (0);
}
@@ -1165,7 +1446,7 @@ print_item(const char *tag, WT_ITEM *item)
static const char hex[] = "0123456789abcdef";
const uint8_t *data;
size_t size;
- int ch;
+ u_char ch;
data = item->data;
size = item->size;
@@ -1185,3 +1466,4 @@ print_item(const char *tag, WT_ITEM *item)
}
fprintf(stderr, "}\n");
}
+#endif
diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c
index ca28e4b60d8..69805fb1018 100644
--- a/src/third_party/wiredtiger/test/format/salvage.c
+++ b/src/third_party/wiredtiger/test/format/salvage.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -36,18 +36,16 @@ static void
salvage(void)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
conn = g.wts_conn;
track("salvage", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if ((ret = session->salvage(session, g.uri, "force=true")) != 0)
- die(ret, "session.salvage: %s", g.uri);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_die(ret, "session.salvage: %s", g.uri);
+ testutil_check(session->close(session, NULL));
}
/*
@@ -101,37 +99,37 @@ corrupt(void)
return (0);
found: if (fstat(fd, &sb) == -1)
- die(errno, "salvage-corrupt: fstat");
+ testutil_die(errno, "salvage-corrupt: fstat");
- offset = mmrand(NULL, 0, sb.st_size);
+ offset = mmrand(NULL, 0, (u_int)sb.st_size);
len = (size_t)(20 + (sb.st_size / 100) * 2);
(void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home);
if ((fp = fopen(buf, "w")) == NULL)
- die(errno, "salvage-corrupt: open: %s", buf);
+ testutil_die(errno, "salvage-corrupt: open: %s", buf);
(void)fprintf(fp,
- "salvage-corrupt: offset %" PRIuMAX ", length " SIZET_FMT "\n",
+ "salvage-corrupt: offset %" PRIuMAX ", length %" WT_SIZET_FMT "\n",
(uintmax_t)offset, len);
- (void)fclose(fp);
+ fclose_and_clear(&fp);
if (lseek(fd, offset, SEEK_SET) == -1)
- die(errno, "salvage-corrupt: lseek");
+ testutil_die(errno, "salvage-corrupt: lseek");
memset(buf, 'z', sizeof(buf));
for (; len > 0; len -= nw) {
nw = (size_t)(len > sizeof(buf) ? sizeof(buf) : len);
if (write(fd, buf, nw) == -1)
- die(errno, "salvage-corrupt: write");
+ testutil_die(errno, "salvage-corrupt: write");
}
if (close(fd) == -1)
- die(errno, "salvage-corrupt: close");
+ testutil_die(errno, "salvage-corrupt: close");
/*
* Save a copy of the corrupted file so we can replay the salvage step
* as necessary.
*/
if ((ret = system(copycmd)) != 0)
- die(ret, "salvage corrupt copy step failed");
+ testutil_die(ret, "salvage corrupt copy step failed");
return (1);
}
@@ -143,21 +141,24 @@ found: if (fstat(fd, &sb) == -1)
void
wts_salvage(void)
{
- int ret;
+ WT_DECL_RET;
/* Some data-sources don't support salvage. */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
return;
+ if (g.c_salvage == 0)
+ return;
+
/*
* Save a copy of the interesting files so we can replay the salvage
* step as necessary.
*/
if ((ret = system(g.home_salvage_copy)) != 0)
- die(ret, "salvage copy step failed");
+ testutil_die(ret, "salvage copy step failed");
/* Salvage, then verify. */
- wts_open(g.home, 1, &g.wts_conn);
+ wts_open(g.home, true, &g.wts_conn);
salvage();
wts_verify("post-salvage verify");
wts_close();
@@ -173,7 +174,7 @@ wts_salvage(void)
/* Corrupt the file randomly, salvage, then verify. */
if (corrupt()) {
- wts_open(g.home, 1, &g.wts_conn);
+ wts_open(g.home, true, &g.wts_conn);
salvage();
wts_verify("post-corrupt-salvage verify");
wts_close();
diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh
index fe53f64229f..0c86b5e57c6 100755
--- a/src/third_party/wiredtiger/test/format/smoke.sh
+++ b/src/third_party/wiredtiger/test/format/smoke.sh
@@ -1,9 +1,11 @@
#! /bin/sh
+set -e
+
# Smoke-test format as part of running "make check".
-args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none"
+args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none"
-./t $args file_type=fix || exit 1
-./t $args file_type=row || exit 1
-./t $args file_type=row data_source=lsm || exit 1
-./t $args file_type=var || exit 1
+$TEST_WRAPPER ./t $args file_type=fix
+$TEST_WRAPPER ./t $args file_type=row
+$TEST_WRAPPER ./t $args file_type=row data_source=lsm
+$TEST_WRAPPER ./t $args file_type=var
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 21dc02ce522..da8a496f4ee 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -30,8 +30,10 @@
GLOBAL g;
+static void format_die(void);
static void startup(void);
-static void usage(void);
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_optind;
extern char *__wt_optarg;
@@ -40,15 +42,21 @@ int
main(int argc, char *argv[])
{
time_t start;
- int ch, i, reps, ret;
+ int ch, onerun, reps;
const char *config, *home;
+ custom_die = format_die; /* Local death handler. */
+
config = NULL;
- if ((g.progname = strrchr(argv[0], '/')) == NULL)
+#ifdef _WIN32
+ g.progname = "t_format.exe";
+#else
+ if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL)
g.progname = argv[0];
else
++g.progname;
+#endif
#if 0
/* Configure the GNU malloc for debugging. */
@@ -60,15 +68,16 @@ main(int argc, char *argv[])
#endif
/* Track progress unless we're re-directing output to a file. */
- g.track = isatty(1) ? 1 : 0;
+ g.c_quiet = isatty(1) ? 0 : 1;
/* Set values from the command line. */
home = NULL;
+ onerun = 0;
while ((ch = __wt_getopt(
g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF)
switch (ch) {
case '1': /* One run */
- g.c_runs = 1;
+ onerun = 1;
break;
case 'C': /* wiredtiger_open config */
g.config_open = __wt_optarg;
@@ -94,7 +103,7 @@ main(int argc, char *argv[])
g.logging = LOG_OPS;
break;
case 'q': /* Quiet */
- g.track = 0;
+ g.c_quiet = 1;
break;
case 'r': /* Replay a run */
g.replay = 1;
@@ -105,14 +114,8 @@ main(int argc, char *argv[])
argc -= __wt_optind;
argv += __wt_optind;
- /*
- * Initialize the global RNG. Start with the standard seeds, and then
- * use seconds since the Epoch modulo a prime to run the RNG for some
- * number of steps, so we don't start with the same values every time.
- */
+ /* Initialize the global RNG. */
__wt_random_init(&g.rnd);
- for (i = (int)time(NULL) % 10007; i > 0; --i)
- (void)__wt_random(&g.rnd);
/* Set up paths. */
path_setup(home);
@@ -120,9 +123,9 @@ main(int argc, char *argv[])
/* If it's a replay, use the home directory's CONFIG file. */
if (g.replay) {
if (config != NULL)
- die(EINVAL, "-c incompatible with -r");
+ testutil_die(EINVAL, "-c incompatible with -r");
if (access(g.home_config, R_OK) != 0)
- die(ENOENT, "%s", g.home_config);
+ testutil_die(ENOENT, "%s", g.home_config);
config = g.home_config;
}
@@ -160,20 +163,21 @@ main(int argc, char *argv[])
if (g.replay && SINGLETHREADED)
g.c_runs = 1;
- /* Use line buffering on stdout so status updates aren't buffered. */
- (void)setvbuf(stdout, NULL, _IOLBF, 32);
-
/*
- * Initialize locks to single-thread named checkpoints and backups, and
- * to single-thread last-record updates.
+ * Let the command line -1 flag override runs configured from other
+ * sources.
*/
- if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0)
- die(ret, "pthread_rwlock_init: append lock");
- if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0)
- die(ret, "pthread_rwlock_init: backup lock");
+ if (onerun)
+ g.c_runs = 1;
- /* Seed the random number generator. */
- srand((u_int)(0xdeadbeef ^ (u_int)time(NULL)));
+ /*
+ * Initialize locks to single-thread named checkpoints and backups, last
+ * last-record updates, and failures.
+ */
+ testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid());
while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) {
@@ -186,10 +190,12 @@ main(int argc, char *argv[])
start = time(NULL);
track("starting up", 0ULL, NULL);
+#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_open(); /* Initial file config */
- wts_open(g.home, 1, &g.wts_conn);
- wts_create();
+#endif
+ wts_open(g.home, true, &g.wts_conn);
+ wts_init();
wts_load(); /* Load initial records */
wts_verify("post-bulk verify"); /* Verify */
@@ -225,8 +231,10 @@ main(int argc, char *argv[])
}
track("shutting down", 0ULL, NULL);
+#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_close();
+#endif
wts_close();
/*
@@ -242,25 +250,24 @@ main(int argc, char *argv[])
wts_salvage();
/* Overwrite the progress line with a completion line. */
- if (g.track)
+ if (!g.c_quiet)
printf("\r%78s\r", " ");
printf("%4d: %s, %s (%.0f seconds)\n",
g.run_cnt, g.c_data_source,
g.c_file_type, difftime(time(NULL), start));
+ fflush(stdout);
}
/* Flush/close any logging information. */
- if (g.logfp != NULL)
- (void)fclose(g.logfp);
- if (g.rand_log != NULL)
- (void)fclose(g.rand_log);
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
config_print(0);
- if ((ret = pthread_rwlock_destroy(&g.append_lock)) != 0)
- die(ret, "pthread_rwlock_destroy: append lock");
- if ((ret = pthread_rwlock_destroy(&g.backup_lock)) != 0)
- die(ret, "pthread_rwlock_destroy: backup lock");
+ testutil_check(pthread_rwlock_destroy(&g.append_lock));
+ testutil_check(pthread_rwlock_destroy(&g.backup_lock));
+ testutil_check(pthread_rwlock_destroy(&g.checkpoint_lock));
+ testutil_check(pthread_rwlock_destroy(&g.death_lock));
config_clear();
@@ -274,81 +281,51 @@ main(int argc, char *argv[])
static void
startup(void)
{
- int ret;
-
- /* Close the logging file. */
- if (g.logfp != NULL) {
- (void)fclose(g.logfp);
- g.logfp = NULL;
- }
-
- /* Close the random number logging file. */
- if (g.rand_log != NULL) {
- (void)fclose(g.rand_log);
- g.rand_log = NULL;
- }
+ WT_DECL_RET;
- /* Create home if it doesn't yet exist. */
- if (access(g.home, X_OK) != 0 && mkdir(g.home, 0777) != 0)
- die(errno, "mkdir: %s", g.home);
+ /* Flush/close any logging information. */
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
- /* Remove the run's files except for rand. */
+ /* Create or initialize the home and data-source directories. */
if ((ret = system(g.home_init)) != 0)
- die(ret, "home directory initialization failed");
+ testutil_die(ret, "home directory initialization failed");
- /* Create the data-source directory. */
- if (mkdir(g.home_kvs, 0777) != 0)
- die(errno, "mkdir: %s", g.home_kvs);
+ /* Open/truncate the logging file. */
+ if (g.logging != 0 && (g.logfp = fopen(g.home_log, "w")) == NULL)
+ testutil_die(errno, "fopen: %s", g.home_log);
- /*
- * Open/truncate the logging file; line buffer so we see up-to-date
- * information on error.
- */
- if (g.logging != 0) {
- if ((g.logfp = fopen(g.home_log, "w")) == NULL)
- die(errno, "fopen: %s", g.home_log);
- (void)setvbuf(g.logfp, NULL, _IOLBF, 0);
- }
-
- /*
- * Open/truncate the random number logging file; line buffer so we see
- * up-to-date information on error.
- */
- if ((g.rand_log = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL)
- die(errno, "%s", g.home_rand);
- (void)setvbuf(g.rand_log, NULL, _IOLBF, 32);
+ /* Open/truncate the random number logging file. */
+ if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL)
+ testutil_die(errno, "%s", g.home_rand);
}
/*
* die --
- * Report an error and quit, dumping the configuration.
+ * Report an error, dumping the configuration.
*/
-void
-die(int e, const char *fmt, ...)
+static void
+format_die(void)
{
- va_list ap;
-
- if (fmt != NULL) { /* Death message. */
- fprintf(stderr, "%s: ", g.progname);
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- if (e != 0)
- fprintf(stderr, ": %s", wiredtiger_strerror(e));
+ /*
+ * Single-thread error handling, our caller exits after calling
+ * us - don't release the lock.
+ */
+ (void)pthread_rwlock_wrlock(&g.death_lock);
+
+ /* Try and turn off tracking so it doesn't obscure the error message. */
+ if (!g.c_quiet) {
+ g.c_quiet = 1;
fprintf(stderr, "\n");
}
/* Flush/close any logging information. */
- if (g.logfp != NULL)
- (void)fclose(g.logfp);
- if (g.rand_log != NULL)
- (void)fclose(g.rand_log);
+ fclose_and_clear(&g.logfp);
+ fclose_and_clear(&g.randfp);
/* Display the configuration that failed. */
if (g.run_cnt)
config_print(1);
-
- exit(EXIT_FAILURE);
}
/*
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index dff5ab43571..4f5e6528908 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -32,28 +32,11 @@
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-static inline uint32_t
-kv_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max)
-{
- /*
- * Focus on relatively small key/value items, admitting the possibility
- * of larger items. Pick a size close to the minimum most of the time,
- * only create a larger item 1 in 20 times, and a really big item 1 in
- * 1000 times. (Configuration can force large key/value minimum sizes,
- * where every key/value item is an overflow.)
- */
- if (keyno % 1000 == 0 && max < KILOBYTE(80)) {
- min = KILOBYTE(80);
- max = KILOBYTE(100);
- } else if (keyno % 20 != 0 && max > min + 20)
- max = min + 20;
- return (mmrand(rnd, min, max));
-}
-
void
key_len_setup(void)
{
size_t i;
+ uint32_t max;
/*
* The key is a variable length item with a leading 10-digit value.
@@ -63,73 +46,123 @@ key_len_setup(void)
* the pre-loaded lengths.
*
* Fill in the random key lengths.
+ *
+ * Focus on relatively small items, admitting the possibility of larger
+ * items. Pick a size close to the minimum most of the time, only create
+ * a larger item 1 in 20 times.
*/
- for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i)
- g.key_rand_len[i] =
- kv_len(NULL, (uint64_t)i, g.c_key_min, g.c_key_max);
+ for (i = 0;
+ i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) {
+ max = g.c_key_max;
+ if (i % 20 != 0 && max > g.c_key_min + 20)
+ max = g.c_key_min + 20;
+ g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max);
+ }
}
void
-key_gen_setup(uint8_t **keyp)
+key_gen_setup(WT_ITEM *key)
{
- uint8_t *key;
size_t i, len;
-
- *keyp = NULL;
+ char *p;
len = MAX(KILOBYTE(100), g.c_key_max);
- if ((key = malloc(len)) == NULL)
- die(errno, "malloc");
+ p = dmalloc(len);
for (i = 0; i < len; ++i)
- key[i] = (uint8_t)("abcdefghijklmnopqrstuvwxyz"[i % 26]);
- *keyp = key;
+ p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
+
+ key->mem = p;
+ key->memsize = len;
+ key->data = key->mem;
+ key->size = 0;
}
static void
-key_gen_common(uint8_t *key, size_t *sizep, uint64_t keyno, int suffix)
+key_gen_common(WT_ITEM *key, uint64_t keyno, const char * const suffix)
{
int len;
+ char *p;
+
+ p = key->mem;
/*
- * The key always starts with a 10-digit string (the specified cnt)
+ * The key always starts with a 10-digit string (the specified row)
* followed by two digits, a random number between 1 and 15 if it's
* an insert, otherwise 00.
*/
- len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix);
+ u64_to_string_zf(keyno, key->mem, 11);
+ p[10] = '.';
+ p[11] = suffix[0];
+ p[12] = suffix[1];
+ len = 13;
/*
- * In a column-store, the key is only used for BDB, and so it doesn't
- * need a random length.
+ * In a column-store, the key is only used for Berkeley DB inserts,
+ * and so it doesn't need a random length.
*/
if (g.type == ROW) {
- key[len] = '/';
- len = (int)g.key_rand_len[keyno %
- (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))];
+ p[len] = '/';
+
+ /*
+ * Because we're doing table lookup for key sizes, we weren't
+ * able to set really big keys sizes in the table, the table
+ * isn't big enough to keep our hash from selecting too many
+ * big keys and blowing out the cache. Handle that here, use a
+ * really big key 1 in 2500 times.
+ */
+ len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ?
+ KILOBYTE(80) :
+ (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)];
}
- *sizep = (size_t)len;
+
+ key->data = key->mem;
+ key->size = (size_t)len;
}
void
-key_gen(uint8_t *key, size_t *sizep, uint64_t keyno)
+key_gen(WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, sizep, keyno, 0);
+ key_gen_common(key, keyno, "00");
}
void
-key_gen_insert(WT_RAND_STATE *rnd, uint8_t *key, size_t *sizep, uint64_t keyno)
+key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno)
{
- key_gen_common(key, sizep, keyno, (int)mmrand(rnd, 1, 15));
+ static const char * const suffix[15] = {
+ "01", "02", "03", "04", "05",
+ "06", "07", "08", "09", "10",
+ "11", "12", "13", "14", "15"
+ };
+
+ key_gen_common(key, keyno, suffix[mmrand(rnd, 1, 15) - 1]);
}
static uint32_t val_dup_data_len; /* Length of duplicate data items */
+static inline uint32_t
+value_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max)
+{
+ /*
+ * Focus on relatively small items, admitting the possibility of larger
+ * items. Pick a size close to the minimum most of the time, only create
+ * a larger item 1 in 20 times, and a really big item 1 in somewhere
+ * around 2500 items.
+ */
+ if (keyno % 2500 == 0 && max < KILOBYTE(80)) {
+ min = KILOBYTE(80);
+ max = KILOBYTE(100);
+ } else if (keyno % 20 != 0 && max > min + 20)
+ max = min + 20;
+ return (mmrand(rnd, min, max));
+}
+
void
-val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp)
+val_gen_setup(WT_RAND_STATE *rnd, WT_ITEM *value)
{
- uint8_t *val;
size_t i, len;
+ char *p;
- *valp = NULL;
+ memset(value, 0, sizeof(WT_ITEM));
/*
* Set initial buffer contents to recognizable text.
@@ -139,36 +172,43 @@ val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp)
* data for column-store run-length encoded files.
*/
len = MAX(KILOBYTE(100), g.c_value_max) + 20;
- if ((val = malloc(len)) == NULL)
- die(errno, "malloc");
+ p = dmalloc(len);
for (i = 0; i < len; ++i)
- val[i] = (uint8_t)("ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]);
+ p[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
- *valp = val;
+ value->mem = p;
+ value->memsize = len;
+ value->data = value->mem;
+ value->size = 0;
- val_dup_data_len = kv_len(rnd,
+ val_dup_data_len = value_len(rnd,
(uint64_t)mmrand(rnd, 1, 20), g.c_value_min, g.c_value_max);
}
void
-val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno)
+val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno)
{
+ char *p;
+
+ p = value->mem;
+ value->data = value->mem;
+
/*
* Fixed-length records: take the low N bits from the last digit of
* the record number.
*/
if (g.type == FIX) {
switch (g.c_bitcnt) {
- case 8: val[0] = mmrand(rnd, 1, 0xff); break;
- case 7: val[0] = mmrand(rnd, 1, 0x7f); break;
- case 6: val[0] = mmrand(rnd, 1, 0x3f); break;
- case 5: val[0] = mmrand(rnd, 1, 0x1f); break;
- case 4: val[0] = mmrand(rnd, 1, 0x0f); break;
- case 3: val[0] = mmrand(rnd, 1, 0x07); break;
- case 2: val[0] = mmrand(rnd, 1, 0x03); break;
- case 1: val[0] = 1; break;
+ case 8: p[0] = (char)mmrand(rnd, 1, 0xff); break;
+ case 7: p[0] = (char)mmrand(rnd, 1, 0x7f); break;
+ case 6: p[0] = (char)mmrand(rnd, 1, 0x3f); break;
+ case 5: p[0] = (char)mmrand(rnd, 1, 0x1f); break;
+ case 4: p[0] = (char)mmrand(rnd, 1, 0x0f); break;
+ case 3: p[0] = (char)mmrand(rnd, 1, 0x07); break;
+ case 2: p[0] = (char)mmrand(rnd, 1, 0x03); break;
+ case 1: p[0] = 1; break;
}
- *sizep = 1;
+ value->size = 1;
return;
}
@@ -177,29 +217,24 @@ val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno)
* test that by inserting a zero-length data item every so often.
*/
if (keyno % 63 == 0) {
- val[0] = '\0';
- *sizep = 0;
+ p[0] = '\0';
+ value->size = 0;
return;
}
/*
- * Start the data with a 10-digit number.
- *
- * For row and non-repeated variable-length column-stores, change the
- * leading number to ensure every data item is unique. For repeated
- * variable-length column-stores (that is, to test run-length encoding),
- * use the same data value all the time.
+ * Data items have unique leading numbers by default and random lengths;
+ * variable-length column-stores use a duplicate data value to test RLE.
*/
- if ((g.type == ROW || g.type == VAR) &&
- g.c_repeat_data_pct != 0 &&
- mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
- (void)strcpy((char *)val, "DUPLICATEV");
- val[10] = '/';
- *sizep = val_dup_data_len;
+ if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) {
+ (void)strcpy(p, "DUPLICATEV");
+ p[10] = '/';
+ value->size = val_dup_data_len;
} else {
- (void)sprintf((char *)val, "%010" PRIu64, keyno);
- val[10] = '/';
- *sizep = kv_len(rnd, keyno, g.c_value_min, g.c_value_max);
+ u64_to_string_zf(keyno, p, 11);
+ p[10] = '/';
+ value->size =
+ value_len(rnd, keyno, g.c_value_min, g.c_value_max);
}
}
@@ -210,7 +245,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
int len;
char msg[128];
- if (!g.track || tag == NULL)
+ if (g.c_quiet || tag == NULL)
return;
if (tinfo == NULL && cnt == 0)
@@ -242,9 +277,9 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
lastlen = len;
if (printf("%s\r", msg) < 0)
- die(EIO, "printf");
+ testutil_die(EIO, "printf");
if (fflush(stdout) == EOF)
- die(errno, "fflush");
+ testutil_die(errno, "fflush");
}
/*
@@ -257,80 +292,82 @@ path_setup(const char *home)
size_t len;
/* Home directory. */
- if ((g.home = strdup(home == NULL ? "RUNDIR" : home)) == NULL)
- die(errno, "malloc");
+ g.home = dstrdup(home == NULL ? "RUNDIR" : home);
/* Log file. */
len = strlen(g.home) + strlen("log") + 2;
- if ((g.home_log = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_log = dmalloc(len);
snprintf(g.home_log, len, "%s/%s", g.home, "log");
/* RNG log file. */
len = strlen(g.home) + strlen("rand") + 2;
- if ((g.home_rand = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_rand = dmalloc(len);
snprintf(g.home_rand, len, "%s/%s", g.home, "rand");
/* Run file. */
len = strlen(g.home) + strlen("CONFIG") + 2;
- if ((g.home_config = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_config = dmalloc(len);
snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG");
/* Statistics file. */
len = strlen(g.home) + strlen("stats") + 2;
- if ((g.home_stats = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_stats = dmalloc(len);
snprintf(g.home_stats, len, "%s/%s", g.home, "stats");
- /* Backup directory. */
- len = strlen(g.home) + strlen("BACKUP") + 2;
- if ((g.home_backup = malloc(len)) == NULL)
- die(errno, "malloc");
- snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP");
-
/* BDB directory. */
len = strlen(g.home) + strlen("bdb") + 2;
- if ((g.home_bdb = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_bdb = dmalloc(len);
snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb");
- /* KVS directory. */
- len = strlen(g.home) + strlen("KVS") + 2;
- if ((g.home_kvs = malloc(len)) == NULL)
- die(errno, "malloc");
- snprintf(g.home_kvs, len, "%s/%s", g.home, "KVS");
-
/*
- * Home directory initialize command: remove everything except the RNG
- * log file.
+ * Home directory initialize command: create the directory if it doesn't
+ * exist, else remove everything except the RNG log file, create the KVS
+ * sub-directory.
*
* Redirect the "cd" command to /dev/null so chatty cd implementations
* don't add the new working directory to our output.
*/
#undef CMD
#ifdef _WIN32
-#define CMD "cd %s && del /s /q * >:nul && rd /s /q KVS"
+#define CMD "del /q rand.copy & " \
+ "(IF EXIST %s\\rand copy /y %s\\rand rand.copy) & " \
+ "(IF EXIST %s rd /s /q %s) & mkdir %s & " \
+ "(IF EXIST rand.copy copy rand.copy %s\\rand) & " \
+ "cd %s & mkdir KVS"
+ len = strlen(g.home) * 7 + strlen(CMD) + 1;
+ g.home_init = dmalloc(len);
+ snprintf(g.home_init, len, CMD,
+ g.home, g.home, g.home, g.home, g.home, g.home, g.home);
#else
-#define CMD "cd %s > /dev/null && rm -rf `ls | sed /rand/d`"
+#define CMD "test -e %s || mkdir %s; " \
+ "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \
+ "mkdir KVS"
+ len = strlen(g.home) * 3 + strlen(CMD) + 1;
+ g.home_init = dmalloc(len);
+ snprintf(g.home_init, len, CMD, g.home, g.home, g.home);
#endif
- len = strlen(g.home) + strlen(CMD) + 1;
- if ((g.home_init = malloc(len)) == NULL)
- die(errno, "malloc");
- snprintf(g.home_init, len, CMD, g.home);
- /* Backup directory initialize command, remove and re-create it. */
+ /* Primary backup directory. */
+ len = strlen(g.home) + strlen("BACKUP") + 2;
+ g.home_backup = dmalloc(len);
+ snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP");
+
+ /*
+ * Backup directory initialize command, remove and re-create the primary
+ * backup directory, plus a copy we maintain for recovery testing.
+ */
#undef CMD
#ifdef _WIN32
-#define CMD "del /s /q >:nul && mkdir %s"
+#define CMD "rd /s /q %s\\%s %s\\%s & mkdir %s\\%s %s\\%s"
#else
-#define CMD "rm -rf %s && mkdir %s"
+#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s"
#endif
- len = strlen(g.home_backup) * 2 + strlen(CMD) + 1;
- if ((g.home_backup_init = malloc(len)) == NULL)
- die(errno, "malloc");
- snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup);
+ len = strlen(g.home) * 4 +
+ strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1;
+ g.home_backup_init = dmalloc(len);
+ snprintf(g.home_backup_init, len, CMD,
+ g.home, "BACKUP", g.home, "BACKUP_COPY",
+ g.home, "BACKUP", g.home, "BACKUP_COPY");
/*
* Salvage command, save the interesting files so we can replay the
@@ -352,8 +389,7 @@ path_setup(const char *home)
"cp WiredTiger* wt* slvg.copy/"
#endif
len = strlen(g.home) + strlen(CMD) + 1;
- if ((g.home_salvage_copy = malloc(len)) == NULL)
- die(errno, "malloc");
+ g.home_salvage_copy = dmalloc(len);
snprintf(g.home_salvage_copy, len, CMD, g.home);
}
@@ -375,12 +411,8 @@ rng(WT_RAND_STATE *rnd)
rnd = &g.rnd;
/*
- * We can entirely reproduce a run based on the random numbers used
- * in the initial run, plus the configuration files. It would be
- * nice to just log the initial RNG seed, rather than logging every
- * random number generated, but we'd have to include our own RNG,
- * Berkeley DB calls rand() internally, and that messes up the pattern
- * of random numbers.
+ * We can reproduce a single-threaded run based on the random numbers
+ * used in the initial run, plus the configuration files.
*
* Check g.replay and g.rand_log_stop: multithreaded runs log/replay
* until they get to the operations phase, then turn off log/replay,
@@ -390,14 +422,13 @@ rng(WT_RAND_STATE *rnd)
return (__wt_random(rnd));
if (g.replay) {
- if (fgets(buf, sizeof(buf), g.rand_log) == NULL) {
- if (feof(g.rand_log)) {
+ if (fgets(buf, sizeof(buf), g.randfp) == NULL) {
+ if (feof(g.randfp)) {
fprintf(stderr,
- "end of random number log reached, "
- "exiting\n");
+ "\n" "end of random number log reached\n");
exit(EXIT_SUCCESS);
}
- die(errno, "random number log");
+ testutil_die(errno, "random number log");
}
return ((uint32_t)strtoul(buf, NULL, 10));
@@ -405,6 +436,26 @@ rng(WT_RAND_STATE *rnd)
r = __wt_random(rnd);
- fprintf(g.rand_log, "%" PRIu32 "\n", r);
+ /* Save and flush the random number so we're up-to-date on error. */
+ (void)fprintf(g.randfp, "%" PRIu32 "\n", r);
+ (void)fflush(g.randfp);
+
return (r);
}
+
+/*
+ * fclose_and_clear --
+ * Close a file and clear the handle so we don't close twice.
+ */
+void
+fclose_and_clear(FILE **fpp)
+{
+ FILE *fp;
+
+ if ((fp = *fpp) == NULL)
+ return;
+ *fpp = NULL;
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
+ return;
+}
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 986796b2fc6..bf0cb6c14a4 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -1,5 +1,5 @@
/*-
- * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2014-2016 MongoDB, Inc.
* Public Domain 2008-2014 WiredTiger, Inc.
*
* This is free and unencumbered software released into the public domain.
@@ -28,18 +28,53 @@
#include "format.h"
+/*
+ * compressor --
+ * Configure compression.
+ */
+static const char *
+compressor(uint32_t compress_flag)
+{
+ switch (compress_flag) {
+ case COMPRESS_NONE:
+ return ("none");
+ case COMPRESS_LZ4:
+ return ("lz4");
+ case COMPRESS_LZ4_NO_RAW:
+ return ("lz4-noraw");
+ case COMPRESS_LZO:
+ return ("LZO1B-6");
+ case COMPRESS_SNAPPY:
+ return ("snappy");
+ case COMPRESS_ZLIB:
+ return ("zlib");
+ case COMPRESS_ZLIB_NO_RAW:
+ return ("zlib-noraw");
+ default:
+ break;
+ }
+ testutil_die(EINVAL,
+ "illegal compression flag: %#" PRIx32, compress_flag);
+}
+
static int
handle_message(WT_EVENT_HANDLER *handler,
WT_SESSION *session, const char *message)
{
+ int out;
+
(void)(handler);
(void)(session);
- if (g.logfp != NULL)
- return (fprintf(
- g.logfp, "%p:%s\n", session, message) < 0 ? -1 : 0);
-
- return (printf("%p:%s\n", session, message) < 0 ? -1 : 0);
+ /* Write and flush the message so we're up-to-date on error. */
+ if (g.logfp == NULL) {
+ out = printf("%p:%s\n", (void *)session, message);
+ (void)fflush(stdout);
+ } else {
+ out = fprintf(g.logfp, "%p:%s\n", (void *)session, message);
+ (void)fflush(g.logfp);
+ }
+ return (out < 0 ? EIO : 0);
}
/*
@@ -72,26 +107,23 @@ static WT_EVENT_HANDLER event_handler = {
* Open a connection to a WiredTiger database.
*/
void
-wts_open(const char *home, int set_api, WT_CONNECTION **connp)
+wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
{
WT_CONNECTION *conn;
- int ret;
- char config[4096], *end, *p;
+ WT_DECL_RET;
+ char *config, *end, *p, helium_config[1024];
*connp = NULL;
- p = config;
- end = config + sizeof(config);
+ config = p = g.wiredtiger_open_config;
+ end = config + sizeof(g.wiredtiger_open_config);
p += snprintf(p, REMAIN(p, end),
- "create,checkpoint_sync=false,cache_size=%" PRIu32 "MB",
- g.c_cache);
-
-#ifdef _WIN32
- p += snprintf(p, REMAIN(p, end), ",error_prefix=\"t_format.exe\"");
-#else
- p += snprintf(p, REMAIN(p, end), ",error_prefix=\"%s\"", g.progname);
-#endif
+ "create=true,"
+ "cache_size=%" PRIu32 "MB,"
+ "checkpoint_sync=false,"
+ "error_prefix=\"%s\"",
+ g.c_cache, g.progname);
/* LSM configuration. */
if (DATASOURCE("lsm"))
@@ -107,17 +139,22 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
/* Logging configuration. */
if (g.c_logging)
p += snprintf(p, REMAIN(p, end),
- ",log=(enabled=true,archive=%d,prealloc=%d)",
+ ",log=(enabled=true,archive=%d,prealloc=%d"
+ ",compressor=\"%s\")",
g.c_logging_archive ? 1 : 0,
- g.c_logging_prealloc ? 1 : 0);
+ g.c_logging_prealloc ? 1 : 0,
+ compressor(g.c_logging_compression_flag));
/* Miscellaneous. */
-#ifndef _WIN32
+#ifdef HAVE_POSIX_MEMALIGN
p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512");
#endif
p += snprintf(p, REMAIN(p, end), ",mmap=%d", g.c_mmap ? 1 : 0);
+ if (g.c_direct_io)
+ p += snprintf(p, REMAIN(p, end), ",direct_io=(data)");
+
if (g.c_data_extend)
p += snprintf(p, REMAIN(p, end), ",file_extend=(data=8MB)");
@@ -141,9 +178,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
/* Extensions. */
p += snprintf(p, REMAIN(p, end),
",extensions=["
- "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
+ "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
g.c_reverse ? REVERSE_PATH : "",
- access(BZIP_PATH, R_OK) == 0 ? BZIP_PATH : "",
access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "",
access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "",
@@ -161,19 +197,20 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
p += snprintf(p, REMAIN(p, end), ",%s", g.config_open);
if (REMAIN(p, end) == 0)
- die(ENOMEM, "wiredtiger_open configuration buffer too small");
+ testutil_die(ENOMEM,
+ "wiredtiger_open configuration buffer too small");
/*
* Direct I/O may not work with backups, doing copies through the buffer
* cache after configuring direct I/O in Linux won't work. If direct
- * I/O is configured, turn off backups. This isn't a great place to do
+ * I/O is configured, turn off backups. This isn't a great place to do
* this check, but it's only here we have the configuration string.
*/
if (strstr(config, "direct_io") != NULL)
g.c_backups = 0;
- if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0)
- die(ret, "wiredtiger_open: %s", home);
+ testutil_checkfmt(
+ wiredtiger_open(home, &event_handler, config, &conn), "%s", home);
if (set_api)
g.wt_api = conn->get_extension_api(conn);
@@ -186,33 +223,48 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
*/
if (DATASOURCE("helium")) {
if (g.helium_mount == NULL)
- die(EINVAL, "no Helium mount point specified");
- (void)snprintf(config, sizeof(config),
+ testutil_die(EINVAL, "no Helium mount point specified");
+ (void)snprintf(helium_config, sizeof(helium_config),
"entry=wiredtiger_extension_init,config=["
"helium_verbose=0,"
"dev1=[helium_devices=\"he://./%s\","
"helium_o_volume_truncate=1]]",
g.helium_mount);
- if ((ret =
- conn->load_extension(conn, HELIUM_PATH, config)) != 0)
- die(ret,
+ if ((ret = conn->load_extension(
+ conn, HELIUM_PATH, helium_config)) != 0)
+ testutil_die(ret,
"WT_CONNECTION.load_extension: %s:%s",
- HELIUM_PATH, config);
+ HELIUM_PATH, helium_config);
}
*connp = conn;
}
/*
+ * wts_reopen --
+ * Re-open a connection to a WiredTiger database.
+ */
+void
+wts_reopen(void)
+{
+ WT_CONNECTION *conn;
+
+ testutil_checkfmt(wiredtiger_open(g.home, &event_handler,
+ g.wiredtiger_open_config, &conn), "%s", g.home);
+
+ g.wt_api = conn->get_extension_api(conn);
+ g.wts_conn = conn;
+}
+
+/*
* wts_create --
* Create the underlying store.
*/
void
-wts_create(void)
+wts_init(void)
{
WT_CONNECTION *conn;
WT_SESSION *session;
uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue;
- int ret;
char config[4096], *end, *p;
conn = g.wts_conn;
@@ -222,15 +274,16 @@ wts_create(void)
/*
* Ensure that we can service at least one operation per-thread
- * concurrently without filling the cache with pinned pages. We
- * choose a multiplier of three because the max configurations control
- * on disk size and in memory pages are often significantly larger
- * than their disk counterparts.
+ * concurrently without filling the cache with pinned pages. We choose
+ * a multiplier of three because the max configurations control on disk
+ * size and in memory pages are often significantly larger than their
+ * disk counterparts. We also apply the default eviction_dirty_trigger
+ * of 20% so that workloads don't get stuck with dirty pages in cache.
*/
maxintlpage = 1U << g.c_intl_page_max;
maxleafpage = 1U << g.c_leaf_page_max;
while (3 * g.c_threads * (maxintlpage + maxleafpage) >
- g.c_cache << 20) {
+ (g.c_cache << 20) / 5) {
if (maxleafpage <= 512 && maxintlpage <= 512)
break;
if (maxintlpage > 512)
@@ -241,7 +294,7 @@ wts_create(void)
p += snprintf(p, REMAIN(p, end),
"key_format=%s,"
"allocation_size=512,%s"
- "internal_page_max=%d,leaf_page_max=%d",
+ "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32,
(g.type == ROW) ? "u" : "r",
g.c_firstfit ? "block_allocation=first," : "",
maxintlpage, maxleafpage);
@@ -253,15 +306,15 @@ wts_create(void)
maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40);
if (maxintlkey > 20)
p += snprintf(p, REMAIN(p, end),
- ",internal_key_max=%d", maxintlkey);
+ ",internal_key_max=%" PRIu32, maxintlkey);
maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40);
if (maxleafkey > 20)
p += snprintf(p, REMAIN(p, end),
- ",leaf_key_max=%d", maxleafkey);
+ ",leaf_key_max=%" PRIu32, maxleafkey);
maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40);
if (maxleafvalue > 40 && maxleafvalue < 100 * 1024)
p += snprintf(p, REMAIN(p, end),
- ",leaf_value_max=%d", maxleafvalue);
+ ",leaf_value_max=%" PRIu32, maxleafvalue);
switch (g.type) {
case FIX:
@@ -289,7 +342,7 @@ wts_create(void)
",huffman_value=english");
if (g.c_dictionary)
p += snprintf(p, REMAIN(p, end),
- ",dictionary=%d", mmrand(NULL, 123, 517));
+ ",dictionary=%" PRIu32, mmrand(NULL, 123, 517));
break;
}
@@ -307,38 +360,9 @@ wts_create(void)
}
/* Configure compression. */
- switch (g.c_compression_flag) {
- case COMPRESS_NONE:
- break;
- case COMPRESS_BZIP:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"bzip2\"");
- break;
- case COMPRESS_BZIP_RAW:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"bzip2-raw-test\"");
- break;
- case COMPRESS_LZ4:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"lz4\"");
- break;
- case COMPRESS_LZO:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"LZO1B-6\"");
- break;
- case COMPRESS_SNAPPY:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"snappy\"");
- break;
- case COMPRESS_ZLIB:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"zlib\"");
- break;
- case COMPRESS_ZLIB_NO_RAW:
- p += snprintf(p, REMAIN(p, end),
- ",block_compressor=\"zlib-noraw\"");
- break;
- }
+ if (g.c_compression_flag != COMPRESS_NONE)
+ p += snprintf(p, REMAIN(p, end), ",block_compressor=\"%s\"",
+ compressor(g.c_compression_flag));
/* Configure Btree internal key truncation. */
p += snprintf(p, REMAIN(p, end), ",internal_key_truncate=%s",
@@ -385,51 +409,50 @@ wts_create(void)
}
if (REMAIN(p, end) == 0)
- die(ENOMEM, "WT_SESSION.create configuration buffer too small");
+ testutil_die(ENOMEM,
+ "WT_SESSION.create configuration buffer too small");
/*
* Create the underlying store.
*/
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
- if ((ret = session->create(session, g.uri, config)) != 0)
- die(ret, "session.create: %s", g.uri);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_checkfmt(session->create(session, g.uri, config), "%s", g.uri);
+ testutil_check(session->close(session, NULL));
}
void
wts_close(void)
{
WT_CONNECTION *conn;
- int ret;
const char *config;
conn = g.wts_conn;
config = g.c_leak_memory ? "leak_memory" : NULL;
- if ((ret = conn->close(conn, config)) != 0)
- die(ret, "connection.close");
+ testutil_check(conn->close(conn, config));
+ g.wts_conn = NULL;
+ g.wt_api = NULL;
}
void
wts_dump(const char *tag, int dump_bdb)
{
+#ifdef HAVE_BERKELEY_DB
size_t len;
- int ret;
char *cmd;
- /* Some data-sources don't support dump through the wt utility. */
+ /*
+ * In-memory configurations and data-sources don't support dump through
+ * the wt utility.
+ */
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
return;
-#ifndef _WIN32
track("dump files and compare", 0ULL, NULL);
len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100;
- if ((cmd = malloc(len)) == NULL)
- die(errno, "malloc");
+ cmd = dmalloc(len);
(void)snprintf(cmd, len,
"sh s_dumpcmp -h %s %s %s %s %s %s",
g.home,
@@ -439,9 +462,11 @@ wts_dump(const char *tag, int dump_bdb)
g.uri == NULL ? "" : "-n",
g.uri == NULL ? "" : g.uri);
- if ((ret = system(cmd)) != 0)
- die(ret, "%s: dump comparison failed", tag);
+ testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag);
free(cmd);
+#else
+ (void)tag; /* [-Wunused-variable] */
+ (void)dump_bdb; /* [-Wunused-variable] */
#endif
}
@@ -449,14 +474,16 @@ void
wts_verify(const char *tag)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
- int ret;
+
+ if (g.c_verify == 0)
+ return;
conn = g.wts_conn;
track("verify", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== verify start ===============");
@@ -464,13 +491,12 @@ wts_verify(const char *tag)
/* Session operations for LSM can return EBUSY. */
ret = session->verify(session, g.uri, "strict");
if (ret != 0 && !(ret == EBUSY && DATASOURCE("lsm")))
- die(ret, "session.verify: %s: %s", g.uri, tag);
+ testutil_die(ret, "session.verify: %s: %s", g.uri, tag);
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
"=============== verify stop ===============");
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
/*
@@ -482,12 +508,13 @@ wts_stats(void)
{
WT_CONNECTION *conn;
WT_CURSOR *cursor;
+ WT_DECL_RET;
WT_SESSION *session;
FILE *fp;
+ size_t len;
char *stat_name;
const char *pval, *desc;
uint64_t v;
- int ret;
/* Ignore statistics if they're not configured. */
if (g.c_statistics == 0)
@@ -500,52 +527,44 @@ wts_stats(void)
conn = g.wts_conn;
track("stat", 0ULL, NULL);
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- die(ret, "connection.open_session");
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
if ((fp = fopen(g.home_stats, "w")) == NULL)
- die(errno, "fopen: %s", g.home_stats);
+ testutil_die(errno, "fopen: %s", g.home_stats);
/* Connection statistics. */
fprintf(fp, "====== Connection statistics:\n");
- if ((ret = session->open_cursor(session,
- "statistics:", NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ testutil_check(session->open_cursor(
+ session, "statistics:", NULL, NULL, &cursor));
while ((ret = cursor->next(cursor)) == 0 &&
(ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- die(errno, "fprintf");
+ testutil_die(errno, "fprintf");
if (ret != WT_NOTFOUND)
- die(ret, "cursor.next");
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
/* Data source statistics. */
fprintf(fp, "\n\n====== Data source statistics:\n");
- if ((stat_name =
- malloc(strlen("statistics:") + strlen(g.uri) + 1)) == NULL)
- die(errno, "malloc");
- sprintf(stat_name, "statistics:%s", g.uri);
- if ((ret = session->open_cursor(
- session, stat_name, NULL, NULL, &cursor)) != 0)
- die(ret, "session.open_cursor");
+ len = strlen("statistics:") + strlen(g.uri) + 1;
+ stat_name = dmalloc(len);
+ snprintf(stat_name, len, "statistics:%s", g.uri);
+ testutil_check(session->open_cursor(
+ session, stat_name, NULL, NULL, &cursor));
free(stat_name);
while ((ret = cursor->next(cursor)) == 0 &&
(ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
- die(errno, "fprintf");
+ testutil_die(errno, "fprintf");
if (ret != WT_NOTFOUND)
- die(ret, "cursor.next");
- if ((ret = cursor->close(cursor)) != 0)
- die(ret, "cursor.close");
+ testutil_die(ret, "cursor.next");
+ testutil_check(cursor->close(cursor));
- if ((ret = fclose(fp)) != 0)
- die(ret, "fclose");
+ fclose_and_clear(&fp);
- if ((ret = session->close(session, NULL)) != 0)
- die(ret, "session.close");
+ testutil_check(session->close(session, NULL));
}
diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am
new file mode 100644
index 00000000000..688571ec5d8
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/Makefile.am
@@ -0,0 +1,18 @@
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
+
+noinst_PROGRAMS = random-abort truncated-log
+random_abort_SOURCES = random-abort.c
+random_abort_LDADD =$(top_builddir)/libwiredtiger.la
+random_abort_LDFLAGS = -static
+
+truncated_log_SOURCES = truncated-log.c
+truncated_log_LDADD =$(top_builddir)/libwiredtiger.la
+truncated_log_LDFLAGS = -static
+
+# Run this during a "make check" smoke test.
+TESTS = smoke.sh
+
+clean-local:
+ rm -rf WT_TEST.* *.core
diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c
new file mode 100644
index 00000000000..31e4b0f30ff
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/random-abort.c
@@ -0,0 +1,428 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <wiredtiger.h>
+#include "wt_internal.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+static char home[512]; /* Program working dir */
+static const char *progname; /* Program name */
+static const char * const uri = "table:main";
+static int inmem;
+
+#define MAX_TH 12
+#define MIN_TH 5
+#define MAX_TIME 40
+#define MIN_TIME 10
+#define RECORDS_FILE "records-%" PRIu32
+
+#define ENV_CONFIG_DEF \
+ "create,log=(file_max=10M,archive=false,enabled)," \
+ "transaction_sync=(enabled=false,method=none)"
+#define ENV_CONFIG_TXNSYNC \
+ "create,log=(file_max=10M,archive=false,enabled)," \
+ "transaction_sync=(enabled,method=none)"
+#define ENV_CONFIG_REC "log=(recover=on)"
+#define MAX_VAL 4096
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+static void
+die(int e, const char *m)
+{
+ fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e));
+ exit(EXIT_FAILURE);
+}
+
+typedef struct {
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t id;
+} WT_THREAD_DATA;
+
+static void *
+thread_run(void *arg)
+{
+ FILE *fp;
+ WT_CURSOR *cursor;
+ WT_ITEM data;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ uint64_t i;
+ int ret;
+ size_t lsize;
+ char buf[MAX_VAL], kname[64], lgbuf[8];
+ char large[128*1024];
+
+ __wt_random_init(&rnd);
+ memset(buf, 0, sizeof(buf));
+ memset(kname, 0, sizeof(kname));
+ lsize = sizeof(large);
+ memset(large, 0, lsize);
+
+ td = (WT_THREAD_DATA *)arg;
+ /*
+ * The value is the name of the record file with our id appended.
+ */
+ snprintf(buf, sizeof(buf), RECORDS_FILE, td->id);
+ /*
+ * Set up a large value putting our id in it. Write it in there a
+ * bunch of times, but the rest of the buffer can just be zero.
+ */
+ snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id);
+ for (i = 0; i < 128; i += strlen(lgbuf))
+ snprintf(&large[i], lsize - i, "%s", lgbuf);
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(buf);
+ if ((fp = fopen(buf, "w")) == NULL)
+ die(errno, "fopen");
+ /*
+ * Set to line buffering. But that is advisory only. We've seen
+ * cases where the result files end up with partial lines.
+ */
+ (void)setvbuf(fp, NULL, _IOLBF, 1024);
+ if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+ die(ret, "WT_CONNECTION:open_session");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ die(ret, "WT_SESSION.open_cursor");
+ data.data = buf;
+ data.size = sizeof(buf);
+ /*
+ * Write our portion of the key space until we're killed.
+ */
+ for (i = td->start; ; ++i) {
+ snprintf(kname, sizeof(kname), "%" PRIu64, i);
+ cursor->set_key(cursor, kname);
+ /*
+ * Every 30th record write a very large record that exceeds the
+ * log buffer size. This forces us to use the unbuffered path.
+ */
+ if (i % 30 == 0) {
+ data.size = 128 * 1024;
+ data.data = large;
+ } else {
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = buf;
+ }
+ cursor->set_value(cursor, &data);
+ if ((ret = cursor->insert(cursor)) != 0)
+ die(ret, "WT_CURSOR.insert");
+ /*
+ * Save the key separately for checking later.
+ */
+ if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
+ die(errno, "fprintf");
+ }
+ return (NULL);
+}
+
+/*
+ * Child process creates the database and table, and then creates worker
+ * threads to add data until it is killed by the parent.
+ */
+static void
+fill_db(uint32_t nth)
+{
+ pthread_t *thr;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ uint32_t i;
+ int ret;
+ const char *envconf;
+
+ thr = calloc(nth, sizeof(pthread_t));
+ td = calloc(nth, sizeof(WT_THREAD_DATA));
+ if (chdir(home) != 0)
+ die(errno, "Child chdir");
+ if (inmem)
+ envconf = ENV_CONFIG_DEF;
+ else
+ envconf = ENV_CONFIG_TXNSYNC;
+ if ((ret = wiredtiger_open(NULL, NULL, envconf, &conn)) != 0)
+ die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ die(ret, "WT_CONNECTION:open_session");
+ if ((ret = session->create(session,
+ uri, "key_format=S,value_format=u")) != 0)
+ die(ret, "WT_SESSION.create");
+ if ((ret = session->close(session, NULL)) != 0)
+ die(ret, "WT_SESSION:close");
+
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = (UINT64_MAX / nth) * i;
+ td[i].id = i;
+ if ((ret = pthread_create(
+ &thr[i], NULL, thread_run, &td[i])) != 0)
+ die(ret, "pthread_create");
+ }
+ printf("Spawned %" PRIu32 " writer threads\n", nth);
+ fflush(stdout);
+ /*
+ * The threads never exit, so the child will just wait here until
+ * it is killed.
+ */
+ for (i = 0; i < nth; ++i)
+ (void)pthread_join(thr[i], NULL);
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ WT_RAND_STATE rnd;
+ uint64_t absent, count, key, last_key, middle;
+ uint32_t i, nth, timeout;
+ int ch, status, ret;
+ pid_t pid;
+ int fatal, rand_th, rand_time, verify_only;
+ const char *working_dir;
+ char cmd[1024], fname[64], kname[64];
+
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ inmem = 0;
+ nth = MIN_TH;
+ rand_th = rand_time = 1;
+ timeout = MIN_TIME;
+ verify_only = 0;
+ working_dir = "WT_TEST.random-abort";
+
+ while ((ch = __wt_getopt(progname, argc, argv, "h:mT:t:v")) != EOF)
+ switch (ch) {
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'm':
+ inmem = 1;
+ break;
+ case 'T':
+ rand_th = 0;
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 't':
+ rand_time = 0;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = 1;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ if ((strlen(working_dir) + 1) > 512) {
+ fprintf(stderr,
+ "Not enough memory in buffer for directory %s\n",
+ working_dir);
+ return (EXIT_FAILURE);
+ }
+ snprintf(home, 512, "%s", working_dir);
+ /*
+ * If the user wants to verify they need to tell us how many threads
+ * there were so we can find the old record files.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr,
+ "Verify option requires specifying number of threads\n");
+ exit (EXIT_FAILURE);
+ }
+ if (!verify_only) {
+ snprintf(cmd, 1024, "rm -rf %s", home);
+ if ((ret = system(cmd)) != 0)
+ die(ret, "system rm");
+ snprintf(cmd, 1024, "mkdir %s", home);
+ if ((ret = system(cmd)) != 0)
+ die(ret, "system mkdir");
+
+ __wt_random_init(&rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+ printf("Parent: Create %" PRIu32
+ " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly
+ * kill the child, run recovery and make sure all items we wrote
+ * exist after recovery runs.
+ */
+ if ((pid = fork()) < 0)
+ die(errno, "fork");
+
+ if (pid == 0) { /* child */
+ fill_db(nth);
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing
+ * the child.
+ */
+ sleep(timeout);
+
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ if (kill(pid, SIGKILL) != 0)
+ die(errno, "kill");
+ if (waitpid(pid, &status, 0) == -1)
+ die(errno, "waitpid");
+ }
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ if (chdir(home) != 0)
+ die(errno, "parent chdir");
+ printf("Open database, run recovery and verify content\n");
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
+ die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ die(ret, "WT_CONNECTION:open_session");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ die(ret, "WT_SESSION.open_cursor");
+
+ absent = count = 0;
+ fatal = 0;
+ for (i = 0; i < nth; ++i) {
+ middle = 0;
+ snprintf(fname, sizeof(fname), RECORDS_FILE, i);
+ if ((fp = fopen(fname, "r")) == NULL) {
+ fprintf(stderr,
+ "Failed to open %s. i %" PRIu32 "\n", fname, i);
+ die(errno, "fopen");
+ }
+
+ /*
+ * For every key in the saved file, verify that the key exists
+ * in the table after recovery. If we're doing in-memory
+ * log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did
+ * write-no-sync, we expect every key to have been recovered.
+ */
+ for (last_key = UINT64_MAX;; ++count, last_key = key) {
+ ret = fscanf(fp, "%" SCNu64 "\n", &key);
+ if (ret != EOF && ret != 1)
+ die(errno, "fscanf");
+ if (ret == EOF)
+ break;
+ /*
+ * If we're unlucky, the last line may be a partially
+ * written key at the end that can result in a false
+ * negative error for a missing record. Detect it.
+ */
+ if (last_key != UINT64_MAX && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64
+ " last valid key %" PRIu64 "\n",
+ fname, key, last_key);
+ break;
+ }
+ snprintf(kname, sizeof(kname), "%" PRIu64, key);
+ cursor->set_key(cursor, kname);
+ if ((ret = cursor->search(cursor)) != 0) {
+ if (ret != WT_NOTFOUND)
+ die(ret, "search");
+ if (!inmem)
+ printf("%s: no record with key %"
+ PRIu64 "\n", fname, key);
+ absent++;
+ middle = key;
+ } else if (middle != 0) {
+ /*
+ * We should never find an existing key after
+ * we have detected one missing.
+ */
+ printf("%s: after absent record at %" PRIu64
+ " key %" PRIu64 " exists\n",
+ fname, middle, key);
+ fatal = 1;
+ }
+ }
+ if (fclose(fp) != 0)
+ die(errno, "fclose");
+ }
+ if ((ret = conn->close(conn, NULL)) != 0)
+ die(ret, "WT_CONNECTION:close");
+ if (fatal)
+ return (EXIT_FAILURE);
+ if (!inmem && absent) {
+ printf("%" PRIu64 " record(s) absent from %" PRIu64 "\n",
+ absent, count);
+ return (EXIT_FAILURE);
+ }
+ printf("%" PRIu64 " records verified\n", count);
+ return (EXIT_SUCCESS);
+}
diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh
new file mode 100755
index 00000000000..ce0662d3b2b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/smoke.sh
@@ -0,0 +1,9 @@
+#! /bin/sh
+
+set -e
+
+# Smoke-test recovery as part of running "make check".
+
+$TEST_WRAPPER ./random-abort -t 10 -T 5
+$TEST_WRAPPER ./random-abort -m -t 10 -T 5
+$TEST_WRAPPER ./truncated-log
diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c
new file mode 100644
index 00000000000..9a16fafa49d
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c
@@ -0,0 +1,398 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <wiredtiger.h>
+#include "wt_internal.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/wait.h>
+
+#ifdef _WIN32
+/* snprintf is not supported on <= VS2013 */
+#define snprintf _snprintf
+#endif
+
+static char home[512]; /* Program working dir */
+static const char *progname; /* Program name */
+static const char * const uri = "table:main";
+
+#define RECORDS_FILE "records"
+
+#define ENV_CONFIG \
+ "create,log=(file_max=100K,archive=false,enabled)," \
+ "transaction_sync=(enabled,method=none)"
+#define ENV_CONFIG_REC "log=(recover=on)"
+
+#define LOG_FILE_1 "WiredTigerLog.0000000001"
+
+#define K_SIZE 16
+#define V_SIZE 256
+
+static void
+die(int e, const char *m)
+{
+ fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e));
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * Write a new log record into the log via log print, then open up a log
+ * cursor and walk the log to make sure we can read it. The reason for this
+ * test is that if there is a partial log record at the end of the previous
+ * log file and truncate does not exist, this tests that we can still read
+ * past that record.
+ */
+static void
+write_and_read_new(WT_SESSION *session)
+{
+ WT_CURSOR *logc;
+ WT_ITEM logrec_key, logrec_value;
+ uint64_t txnid;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
+ int ret;
+ int saw_msg;
+
+ /*
+ * Write a log record and force it to disk so we can read it.
+ */
+ printf("Write log_printf record and verify.\n");
+ if ((ret = session->log_printf(session, "Test Log Record")) != 0)
+ die(ret, "log_printf");
+ if ((ret = session->log_flush(session, "sync=on")) != 0)
+ die(ret, "log_flush");
+ if ((ret = session->open_cursor(
+ session, "log:", NULL, NULL, &logc)) != 0)
+ die(ret, "open_cursor: log");
+ if ((ret = session->open_cursor(
+ session, "log:", NULL, NULL, &logc)) != 0)
+ die(ret, "open_cursor: log");
+ saw_msg = 0;
+ while ((ret = logc->next(logc)) == 0) {
+ /*
+ * We don't really need to get the key, but in case we want
+ * the LSN for some message, get it.
+ */
+ if ((ret = logc->get_key(logc,
+ &log_file, &log_offset, &opcount)) != 0)
+ die(errno, "get_key");
+ if ((ret = logc->get_value(logc, &txnid, &rectype,
+ &optype, &fileid, &logrec_key, &logrec_value)) != 0)
+ die(errno, "get_value");
+ /*
+ * We should never see a record from log file 2. We wrote
+ * a record there, but then the record in log file 1 was
+ * truncated to be a partial record, ending the log there.
+ * So everything after that, including everything in log
+ * file 2, is invalid until we get to log file 3 which is where
+ * the post-recovery records will be written.
+ */
+ if (log_file == 2)
+ die(EINVAL, "Found LSN in Log 2");
+#if 0
+ printf("LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32
+ ": record type %" PRIu32 " optype %" PRIu32
+ " txnid %" PRIu64 " fileid %" PRIu32 "\n",
+ log_file, log_offset, opcount,
+ rectype, optype, txnid, fileid);
+#endif
+ if (rectype == WT_LOGREC_MESSAGE) {
+ saw_msg = 1;
+ printf("Application Record: %s\n",
+ (char *)logrec_value.data);
+ break;
+ }
+ }
+ if ((ret = logc->close(logc)) != 0)
+ die(ret, "log cursor close");
+ if (!saw_msg)
+ die(EINVAL, "Did not traverse log printf record");
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * Child process creates the database and table, and then writes data into
+ * the table until it is killed by the parent.
+ */
+static void
+fill_db(void)
+{
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor, *logc;
+ WT_LSN lsn, save_lsn;
+ WT_SESSION *session;
+ uint32_t i, max_key, min_key, units, unused;
+ int ret;
+ int first;
+ char k[K_SIZE], v[V_SIZE];
+
+ /*
+ * Run in the home directory so that the records file is in there too.
+ */
+ if (chdir(home) != 0)
+ die(errno, "chdir");
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
+ die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ die(ret, "WT_CONNECTION:open_session");
+ if ((ret = session->create(session,
+ uri, "key_format=S,value_format=S")) != 0)
+ die(ret, "WT_SESSION.create");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ die(ret, "WT_SESSION.open_cursor");
+
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(RECORDS_FILE);
+ if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
+ die(errno, "fopen");
+ /*
+ * Set to no buffering.
+ */
+ (void)setvbuf(fp, NULL, _IONBF, 0);
+ save_lsn.file = 0;
+
+ /*
+ * Write data into the table until we move to log file 2.
+ * We do the calculation below so that we don't have to walk the
+ * log for every record.
+ *
+ * Calculate about how many records should fit in the log file.
+ * Subtract a bunch for metadata and file creation records.
+ * Then subtract out a few more records to be conservative.
+ */
+ units = (K_SIZE + V_SIZE) / 128 + 1;
+ min_key = 90000 / (units * 128) - 15;
+ max_key = min_key * 2;
+ first = 1;
+ for (i = 0; i < max_key; ++i) {
+ snprintf(k, sizeof(k), "key%03d", (int)i);
+ snprintf(v, sizeof(v), "value%0*d",
+ (int)(V_SIZE - strlen("value")), (int)i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ if ((ret = cursor->insert(cursor)) != 0)
+ die(ret, "WT_CURSOR.insert");
+
+ /*
+ * Walking the ever growing log can be slow, so only start
+ * looking for the cross into log file 2 after a minimum.
+ */
+ if (i > min_key) {
+ if ((ret = session->open_cursor(
+ session, "log:", NULL, NULL, &logc)) != 0)
+ die(ret, "open_cursor: log");
+ if (save_lsn.file != 0) {
+ logc->set_key(logc,
+ save_lsn.file, save_lsn.offset, 0);
+ if ((ret = logc->search(logc)) != 0)
+ die(ret, "search");
+ }
+ while ((ret = logc->next(logc)) == 0) {
+ if ((ret = logc->get_key(logc,
+ &lsn.file, &lsn.offset, &unused)) != 0)
+ die(ret, "get_key");
+ /*
+ * Save the LSN so that we know the offset
+ * of the last LSN in log file 1 later.
+ */
+ if (lsn.file < 2)
+ save_lsn = lsn;
+ else {
+ /*
+ * If this is the first time through
+ * that the key is larger than the
+ * minimum key and we're already in
+ * log file 2 then we did not calculate
+ * correctly and the test should fail.
+ */
+ if (first)
+ die(EINVAL,
+ "min_key too high");
+ if (fprintf(fp,
+ "%" PRIu64 " %" PRIu32 "\n",
+ save_lsn.offset, i - 1) == -1)
+ die(errno, "fprintf");
+ break;
+ }
+ }
+ first = 0;
+ if ((ret = logc->close(logc)) != 0)
+ die(ret, "log cursor close");
+ }
+ }
+ if (fclose(fp) != 0)
+ die(errno, "fclose");
+ abort();
+ /* NOTREACHED */
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t new_offset, offset;
+ uint32_t count, max_key;
+ int ch, status, ret;
+ pid_t pid;
+ const char *working_dir;
+ char cmd[1024];
+
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ working_dir = "WT_TEST.truncated-log";
+ while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF)
+ switch (ch) {
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ if ((strlen(working_dir) + 1) > 512) {
+ fprintf(stderr,
+ "Not enough memory in buffer for directory %s\n",
+ working_dir);
+ return (EXIT_FAILURE);
+ }
+ snprintf(home, 512, "%s", working_dir);
+ snprintf(cmd, 1024, "rm -rf %s", home);
+ if ((ret = system(cmd)) != 0)
+ die(ret, "system rm");
+ snprintf(cmd, 1024, "mkdir %s", home);
+ if ((ret = system(cmd)) != 0)
+ die(ret, "system mkdir");
+
+ /*
+ * Fork a child to insert as many items. We will then randomly
+ * kill the child, run recovery and make sure all items we wrote
+ * exist after recovery runs.
+ */
+ if ((pid = fork()) < 0)
+ die(errno, "fork");
+
+ if (pid == 0) { /* child */
+ fill_db();
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /* Wait for child to kill itself. */
+ if (waitpid(pid, &status, 0) == -1)
+ die(errno, "waitpid");
+
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ if (chdir(home) != 0)
+ die(errno, "chdir");
+
+ printf("Open database, run recovery and verify content\n");
+ if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
+ die(errno, "fopen");
+ ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key);
+ if (ret != 2)
+ die(errno, "fscanf");
+ if (fclose(fp) != 0)
+ die(errno, "fclose");
+ /*
+ * The offset is the beginning of the last record. Truncate to
+ * the middle of that last record (i.e. ahead of that offset).
+ */
+ if (offset > UINT64_MAX - V_SIZE)
+ die(ERANGE, "offset");
+ new_offset = offset + V_SIZE;
+ printf("Parent: Log file 1: Key %" PRIu32 " at %" PRIu64 "\n",
+ max_key, offset);
+ printf("Parent: Truncate mid-record to %" PRIu64 "\n", new_offset);
+ if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0)
+ die(errno, "truncate");
+
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
+ die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ die(ret, "WT_CONNECTION:open_session");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ die(ret, "WT_SESSION.open_cursor");
+
+ /*
+ * For every key in the saved file, verify that the key exists
+ * in the table after recovery. Since we did write-no-sync, we
+ * expect every key to have been recovered.
+ */
+ count = 0;
+ while ((ret = cursor->next(cursor)) == 0)
+ ++count;
+ /*
+ * The max key in the saved file is the key we truncated, but the
+ * key space starts at 0 and we're counting the records here, so we
+ * expect the max key number of records.
+ */
+ if (count > max_key) {
+ printf("expected %" PRIu32 " records found %" PRIu32 "\n",
+ max_key, count);
+ return (EXIT_FAILURE);
+ }
+ printf("%" PRIu32 " records verified\n", count);
+
+ /*
+ * Write a log record and then walk the log to make sure we can
+ * read that log record that is beyond the truncated record.
+ */
+ write_and_read_new(session);
+ if ((ret = conn->close(conn, NULL)) != 0)
+ die(ret, "WT_CONNECTION:close");
+ return (EXIT_SUCCESS);
+}
diff --git a/src/third_party/wiredtiger/test/utility/Makefile.am b/src/third_party/wiredtiger/test/utility/Makefile.am
new file mode 100644
index 00000000000..a2923eb41a8
--- /dev/null
+++ b/src/third_party/wiredtiger/test/utility/Makefile.am
@@ -0,0 +1,4 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+libtest_util_la_SOURCES = misc.c parse_opts.c thread.c
+noinst_LTLIBRARIES = libtest_util.la
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
new file mode 100644
index 00000000000..1491c9a6938
--- /dev/null
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -0,0 +1,236 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+void (*custom_die)(void) = NULL;
+
+/*
+ * die --
+ * Report an error and quit.
+ */
+void
+testutil_die(int e, const char *fmt, ...)
+{
+ va_list ap;
+
+ /* Allow test programs to cleanup on fatal error. */
+ if (custom_die != NULL)
+ (*custom_die)();
+
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+ if (e != 0)
+ fprintf(stderr, ": %s", wiredtiger_strerror(e));
+ fprintf(stderr, "\n");
+
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * testutil_work_dir_from_path --
+ * Takes a buffer, its size and the intended work directory.
+ * Creates the full intended work directory in buffer.
+ */
+void
+testutil_work_dir_from_path(char *buffer, size_t len, const char *dir)
+{
+ /* If no directory is provided, use the default. */
+ if (dir == NULL)
+ dir = DEFAULT_DIR;
+
+ if (len < strlen(dir) + 1)
+ testutil_die(ENOMEM,
+ "Not enough memory in buffer for directory %s", dir);
+
+ strcpy(buffer, dir);
+}
+
+/*
+ * testutil_clean_work_dir --
+ * Remove the work directory.
+ */
+void
+testutil_clean_work_dir(char *dir)
+{
+ size_t len;
+ int ret;
+ char *buf;
+
+#ifdef _WIN32
+ /* Additional bytes for the Windows rd command. */
+ len = 2 * strlen(dir) + strlen(RM_COMMAND) +
+ strlen(DIR_EXISTS_COMMAND) + 4;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
+
+ snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir,
+ RM_COMMAND, dir);
+#else
+ len = strlen(dir) + strlen(RM_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
+
+ snprintf(buf, len, "%s%s", RM_COMMAND, dir);
+#endif
+
+ if ((ret = system(buf)) != 0 && ret != ENOENT)
+ testutil_die(ret, "%s", buf);
+ free(buf);
+}
+
+/*
+ * testutil_make_work_dir --
+ * Delete the existing work directory, then create a new one.
+ */
+void
+testutil_make_work_dir(char *dir)
+{
+ size_t len;
+ int ret;
+ char *buf;
+
+ testutil_clean_work_dir(dir);
+
+ /* Additional bytes for the mkdir command */
+ len = strlen(dir) + strlen(MKDIR_COMMAND) + 1;
+ if ((buf = malloc(len)) == NULL)
+ testutil_die(ENOMEM, "Failed to allocate memory");
+
+ /* mkdir shares syntax between Windows and Linux */
+ snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir);
+ if ((ret = system(buf)) != 0)
+ testutil_die(ret, "%s", buf);
+ free(buf);
+}
+
+/*
+ * testutil_cleanup --
+ * Delete the existing work directory and free the options structure.
+ */
+void
+testutil_cleanup(TEST_OPTS *opts)
+{
+ if (opts->conn != NULL)
+ testutil_check(opts->conn->close(opts->conn, NULL));
+
+ if (!opts->preserve)
+ testutil_clean_work_dir(opts->home);
+
+ free(opts->uri);
+ free(opts->home);
+}
+
+/*
+ * testutil_disable_long_tests --
+ * Return if TESTUTIL_DISABLE_LONG_TESTS is set.
+ */
+bool
+testutil_disable_long_tests(void)
+{
+ const char *res;
+
+ if (__wt_getenv(NULL,
+ "TESTUTIL_DISABLE_LONG_TESTS", &res) == WT_NOTFOUND)
+ return (false);
+
+ free((void *)res);
+ return (true);
+}
+
+/*
+ * dcalloc --
+ * Call calloc, dying on failure.
+ */
+void *
+dcalloc(size_t number, size_t size)
+{
+ void *p;
+
+ if ((p = calloc(number, size)) != NULL)
+ return (p);
+ testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size);
+}
+
+/*
+ * dmalloc --
+ * Call malloc, dying on failure.
+ */
+void *
+dmalloc(size_t len)
+{
+ void *p;
+
+ if ((p = malloc(len)) != NULL)
+ return (p);
+ testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len);
+}
+
+/*
+ * drealloc --
+ * Call realloc, dying on failure.
+ */
+void *
+drealloc(void *p, size_t len)
+{
+ void *t;
+ if ((t = realloc(p, len)) != NULL)
+ return (t);
+ testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len);
+}
+
+/*
+ * dstrdup --
+ * Call strdup, dying on failure.
+ */
+void *
+dstrdup(const void *str)
+{
+ char *p;
+
+ if ((p = strdup(str)) != NULL)
+ return (p);
+ testutil_die(errno, "strdup");
+}
+
+/*
+ * dstrndup --
+ * Call emulating strndup, dying on failure. Don't use actual strndup here
+ * as it is not supported within MSVC.
+ */
+void *
+dstrndup(const char *str, size_t len)
+{
+ char *p;
+
+ p = dcalloc(len + 1, sizeof(char));
+ memcpy(p, str, len);
+ return (p);
+}
diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c
new file mode 100644
index 00000000000..74a1c021d5d
--- /dev/null
+++ b/src/third_party/wiredtiger/test/utility/parse_opts.c
@@ -0,0 +1,130 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+extern char *__wt_optarg; /* argument associated with option */
+
+/*
+ * testutil_parse_opts --
+ * Parse command line options for a test case.
+ */
+int
+testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts)
+{
+ int ch;
+ size_t len;
+
+ opts->preserve = false;
+ opts->running = true;
+ opts->verbose = false;
+
+ if ((opts->progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ opts->progname = argv[0];
+ else
+ ++opts->progname;
+
+ while ((ch = __wt_getopt(opts->progname,
+ argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF)
+ switch (ch) {
+ case 'A': /* Number of append threads */
+ opts->n_append_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'h': /* Home directory */
+ opts->home = dstrdup(__wt_optarg);
+ break;
+ case 'n': /* Number of records */
+ opts->nrecords = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'o': /* Number of operations */
+ opts->nops = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'p': /* Preserve directory contents */
+ opts->preserve = true;
+ break;
+ case 'R': /* Number of reader threads */
+ opts->n_read_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 'T': /* Number of threads */
+ opts->nthreads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case 't': /* Table type */
+ switch (__wt_optarg[0]) {
+ case 'C':
+ case 'c':
+ opts->table_type = TABLE_COL;
+ break;
+ case 'F':
+ case 'f':
+ opts->table_type = TABLE_FIX;
+ break;
+ case 'R':
+ case 'r':
+ opts->table_type = TABLE_ROW;
+ break;
+ }
+ break;
+ case 'v':
+ opts->verbose = true;
+ break;
+ case 'W': /* Number of writer threads */
+ opts->n_write_threads = (uint64_t)atoll(__wt_optarg);
+ break;
+ case '?':
+ default:
+ (void)fprintf(stderr, "usage: %s "
+ "[-A append thread count] "
+ "[-h home] "
+ "[-n record count] "
+ "[-o op count] "
+ "[-p] "
+ "[-R read thread count] "
+ "[-T thread count] "
+ "[-t c|f|r table type] "
+ "[-v] "
+ "[-W write thread count] ",
+ opts->progname);
+ return (1);
+ }
+
+ /*
+ * Setup the home directory if not explicitly specified. It needs to be
+ * unique for every test or the auto make parallel tester gets upset.
+ */
+ if (opts->home == NULL) {
+ len = strlen("WT_TEST.") + strlen(opts->progname) + 10;
+ opts->home = dmalloc(len);
+ snprintf(opts->home, len, "WT_TEST.%s", opts->progname);
+ }
+
+ /* Setup the default URI string */
+ len = strlen("table:") + strlen(opts->progname) + 10;
+ opts->uri = dmalloc(len);
+ snprintf(opts->uri, len, "table:%s", opts->progname);
+
+ return (0);
+}
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
new file mode 100644
index 00000000000..f6a9cd68e02
--- /dev/null
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -0,0 +1,194 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wt_internal.h" /* For __wt_XXX */
+
+#ifdef _WIN32
+ #define DIR_DELIM '\\'
+ #define DIR_DELIM_STR "\\"
+ #define DIR_EXISTS_COMMAND "IF EXIST "
+ #define RM_COMMAND "rd /s /q "
+#else
+ #define DIR_DELIM '/'
+ #define DIR_DELIM_STR "/"
+ #define RM_COMMAND "rm -rf "
+#endif
+
+#define DEFAULT_DIR "WT_TEST"
+#define MKDIR_COMMAND "mkdir "
+
+#ifdef _WIN32
+#include "windows_shim.h"
+#endif
+
+/* Generic option parsing structure shared by all test cases. */
+typedef struct {
+ char *home;
+ char *progname;
+ enum { TABLE_COL=1, /* Fixed-length column store */
+ TABLE_FIX=2, /* Variable-length column store */
+ TABLE_ROW=3 /* Row-store */
+ } table_type;
+ bool preserve; /* Don't remove files on exit */
+ bool verbose; /* Run in verbose mode */
+ uint64_t nrecords; /* Number of records */
+ uint64_t nops; /* Number of operations */
+ uint64_t nthreads; /* Number of threads */
+ uint64_t n_append_threads; /* Number of append threads */
+ uint64_t n_read_threads; /* Number of read threads */
+ uint64_t n_write_threads; /* Number of write threads */
+
+ /*
+ * Fields commonly shared within a test program. The test cleanup
+ * function will attempt to automatically free and close non-null
+ * resources.
+ */
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ bool running;
+ char *uri;
+ volatile uint64_t next_threadid;
+ uint64_t max_inserted_id;
+} TEST_OPTS;
+
+/*
+ * testutil_assert --
+ * Complain and quit if something isn't true.
+ */
+#define testutil_assert(a) do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \
+} while (0)
+
+/*
+ * testutil_assertfmt --
+ * Complain and quit if something isn't true.
+ */
+#define testutil_assertfmt(a, fmt, ...) do { \
+ if (!(a)) \
+ testutil_die(0, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #a, __VA_ARGS__); \
+} while (0)
+
+/*
+ * testutil_check --
+ * Complain and quit if a function call fails.
+ */
+#define testutil_check(call) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die( \
+ __r, "%s/%d: %s", __func__, __LINE__, #call); \
+} while (0)
+
+/*
+ * testutil_checkfmt --
+ * Complain and quit if a function call fails, with additional arguments.
+ */
+#define testutil_checkfmt(call, fmt, ...) do { \
+ int __r; \
+ if ((__r = (call)) != 0) \
+ testutil_die(__r, "%s/%d: %s: " fmt, \
+ __func__, __LINE__, #call, __VA_ARGS__); \
+} while (0)
+
+/*
+ * u64_to_string --
+ * Convert a uint64_t to a text string.
+ *
+ * Algorithm from Andrei Alexandrescu's talk: "Three Optimization Tips for C++"
+ */
+static inline void
+u64_to_string(uint64_t n, char **pp)
+{
+ static const char hundred_lookup[201] =
+ "0001020304050607080910111213141516171819"
+ "2021222324252627282930313233343536373839"
+ "4041424344454647484950515253545556575859"
+ "6061626364656667686970717273747576777879"
+ "8081828384858687888990919293949596979899";
+ u_int i;
+ char *p;
+
+ /*
+ * The argument pointer references the last element of a buffer (which
+ * must be large enough to hold any possible value).
+ *
+ * Nul-terminate the buffer.
+ */
+ for (p = *pp, *p-- = '\0'; n >= 100; n /= 100) {
+ i = (n % 100) * 2;
+ *p-- = hundred_lookup[i + 1];
+ *p-- = hundred_lookup[i];
+ }
+
+ /* Handle the last two digits. */
+ i = (u_int)n * 2;
+ *p = hundred_lookup[i + 1];
+ if (n >= 10)
+ *--p = hundred_lookup[i];
+
+ /* Return a pointer to the first byte of the text string. */
+ *pp = p;
+}
+
+/*
+ * u64_to_string_zf --
+ * Convert a uint64_t to a text string, zero-filling the buffer.
+ */
+static inline void
+u64_to_string_zf(uint64_t n, char *buf, size_t len)
+{
+ char *p;
+
+ p = buf + (len - 1);
+ u64_to_string(n, &p);
+
+ while (p > buf)
+ *--p = '0';
+}
+
+/* Allow tests to add their own death handling. */
+extern void (*custom_die)(void);
+
+void testutil_die(int, const char *, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+
+void *dcalloc(size_t, size_t);
+void *dmalloc(size_t);
+void *drealloc(void *, size_t);
+void *dstrdup(const void *);
+void *dstrndup(const char *, size_t);
+void testutil_clean_work_dir(char *);
+void testutil_cleanup(TEST_OPTS *);
+bool testutil_disable_long_tests(void);
+void testutil_make_work_dir(char *);
+int testutil_parse_opts(int, char * const *, TEST_OPTS *);
+void testutil_work_dir_from_path(char *, size_t, const char *);
+void *thread_append(void *);
+void *thread_insert_append(void *);
+void *thread_prev(void *);
diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c
new file mode 100644
index 00000000000..38465b2f02b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/utility/thread.c
@@ -0,0 +1,141 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+/*
+ * A thread dedicated to appending records into a table. Works with fixed
+ * length column stores and variable length column stores.
+ * One thread (the first thread created by an application) checks for a
+ * terminating condition after each insert.
+ */
+void *
+thread_append(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t id, recno;
+ char buf[64];
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1);
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, "append", &cursor));
+
+ buf[0] = '\2';
+ for (recno = 1; opts->running; ++recno) {
+ if (opts->table_type == TABLE_FIX)
+ cursor->set_value(cursor, buf[0]);
+ else {
+ snprintf(buf, sizeof(buf),
+ "%" PRIu64 " VALUE ------", recno);
+ cursor->set_value(cursor, buf);
+ }
+ testutil_check(cursor->insert(cursor));
+ if (id == 0) {
+ testutil_check(
+ cursor->get_key(cursor, &opts->max_inserted_id));
+ if (opts->max_inserted_id >= opts->nrecords)
+ opts->running = false;
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Append into a row store table.
+ */
+void *
+thread_insert_append(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t i;
+ char kbuf[64];
+
+ opts = (TEST_OPTS *)arg;
+ conn = opts->conn;
+
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+ testutil_check(session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor));
+
+ for (i = 0; i < opts->nrecords; ++i) {
+ snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i);
+ cursor->set_key(cursor, kbuf);
+ cursor->set_value(cursor, "========== VALUE =======");
+ testutil_check(cursor->insert(cursor));
+ if (i % 100000 == 0) {
+ printf("insert: %" PRIu64 "\r", i);
+ fflush(stdout);
+ }
+ }
+ printf("\n");
+
+ opts->running = false;
+
+ return (NULL);
+}
+
+/*
+ * Repeatedly walk backwards through the records in a table.
+ */
+void *
+thread_prev(void *arg)
+{
+ TEST_OPTS *opts;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ int ret;
+
+ opts = (TEST_OPTS *)arg;
+ ret = 0;
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+ testutil_check(
+ session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+ while (opts->running) {
+ while (opts->running && (ret = cursor->prev(cursor)) == 0)
+ ;
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ testutil_check(ret);
+ }
+
+ testutil_check(session->close(session, NULL));
+ return (NULL);
+}