diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-06-19 20:58:06 +0000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-06-19 20:58:06 +0000 |
commit | 0513806e66fca4715c20ae5ea64e172a2bf1ee74 (patch) | |
tree | 4337ea4650af08f0343ad5a73e9cda6d88f6e535 | |
parent | a4bee00dcaf814493818c1a55f8d060e9195ea9e (diff) | |
download | mongo-0513806e66fca4715c20ae5ea64e172a2bf1ee74.tar.gz |
Import wiredtiger: d687cc19830b5b479d7474724ed609c416bfb1d2 from branch mongodb-3.0
ref: b1aab8db7d..d687cc1983
for: 3.0.16
WT-2321 Fix a race between eviction and worker threads on the eviction queue
WT-3000 Missing log records in recovery when crashing after a log file switch
WT-3008 Run wtperf stress testing against all maintained branches
WT-3010 shared-cache-stress can hang on mongodb-3.0 branch
WT-3038 WiredTigers mongodb-3.0 branch crashes and hangs when run against test format
WT-3049 Backport test/format to mongodb-3.0 branch
WT-3158 Fix structure layout on Windows.
WT-3277 [v3.0] Eviction of pages belonging to tables that have been force dropped
WT-3286 [v3.0] Improve search if an index hint is wrong
39 files changed, 3502 insertions, 1204 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf new file mode 100644 index 00000000000..e8f20cb1f53 --- /dev/null +++ b/src/third_party/wiredtiger/bench/wtperf/stress/btree-split-stress.wtperf @@ -0,0 +1,10 @@ +conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=4,threads_min=4)" +table_config="type=file,leaf_page_max=8k,internal_page_max=8k,memory_page_max=2MB,split_deepen_min_child=250" +icount=200000 +report_interval=5 +run_time=300 +#reopen_connection=false +populate_threads=2 +value_sz=256 +#read_range=100 +threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1)) diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/shared-cache-stress.wtperf b/src/third_party/wiredtiger/bench/wtperf/stress/shared-cache-stress.wtperf index 87d14f4f5c1..87d14f4f5c1 100644 --- a/src/third_party/wiredtiger/bench/wtperf/runners/shared-cache-stress.wtperf +++ b/src/third_party/wiredtiger/bench/wtperf/stress/shared-cache-stress.wtperf diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs index 8f8614a7016..37a8d9e1a66 100644 --- a/src/third_party/wiredtiger/build_posix/Make.subdirs +++ b/src/third_party/wiredtiger/build_posix/Make.subdirs @@ -20,11 +20,18 @@ examples/c lang/java JAVA examples/java JAVA lang/python PYTHON + +# Test/Benchmark support library. +test/utility + +# Test programs. test/bloom test/checkpoint test/fops -test/format HAVE_BERKELEY_DB +test/format test/huge test/packing +test/recovery test/salvage test/thread + diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 6b2ed50701d..657d20a55bd 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -16,6 +16,7 @@ ASYNC Addr Ailamaki Alakuijala +Alexandrescu's Alloc Async Athanassoulis @@ -331,6 +332,7 @@ Subtree Subtrees TAILQ TCMalloc +TESTUTIL TODO TOOSMALL TORTIOUS @@ -389,6 +391,7 @@ WiredTigerTxn WithSeeds Wmissing Wuninitialized +Wunused XP ZLIB Zlib @@ -416,6 +419,7 @@ arg argc args argv +assertfmt async asyncopp autockpt @@ -472,6 +476,8 @@ centric cfg cfkos change's +chdir +checkfmt checkfrag checkpointed checkpointer @@ -539,6 +545,7 @@ datasets datasource datastore dbc +dcalloc decile deciles decl @@ -569,12 +576,16 @@ dlh dll dlopen dlsym +dmalloc dmsg +drealloc ds dsk dsrc dst dstlen +dstrdup +dstrndup dsync dt dtype @@ -648,6 +659,7 @@ fread free'd freelist fs +fscanf fsm fstat fsync @@ -690,6 +702,7 @@ iSh ibackup icount idx +ifdef ifdef's ifndef ikey @@ -781,6 +794,7 @@ lsnappy lu lz lzo +mT madvise majorp malloc @@ -882,6 +896,7 @@ optype os ovfl ownp +pR packv parens parserp @@ -1117,6 +1132,7 @@ uid uint uintmax unbare +unbuffered uncompressing uncompresssed undef @@ -1150,6 +1166,7 @@ usr utf util uu +vW va valuep valuev @@ -1170,6 +1187,7 @@ vsnprintf vtype vunpack vupdate +waitpid walk's wb wiredtiger @@ -1189,6 +1207,7 @@ xxxx xxxxx xxxxxx zalloc +zf zfree zlib zu diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index b0a642a4e26..9c03f9b08b5 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -92,6 +92,16 @@ for f in \ cat $t } + # If we don't have matching pack-begin and pack-end calls, we don't get + # an error, we just get a Windows performance regression. Using awk and + # not wc to ensure there's no whitespace in the assignment. + egrep WT_PACKED_STRUCT $f > $t + cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t` + test `expr "$cnt" % 2` -ne 0 && { + echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines" + cat $t + } + # Direct calls to functions we're not supposed to use in the library. # We don't check for all of them, just a few of the common ones. if ! expr "$f" : 'bench/.*' > /dev/null && diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data new file mode 100644 index 00000000000..7deef8a0f3b --- /dev/null +++ b/src/third_party/wiredtiger/import.data @@ -0,0 +1,6 @@ +{ + "commit": "d687cc19830b5b479d7474724ed609c416bfb1d2", + "github": "wiredtiger/wiredtiger.git", + "vendor": "wiredtiger", + "branch": "mongodb-3.0" +} diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 4affc9c243e..d533c805d3a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -634,6 +634,8 @@ __debug_page_metadata(WT_DBG *ds, WT_PAGE *page) __dmsg(ds, ", evict-lru"); if (F_ISSET_ATOMIC(page, WT_PAGE_SCANNING)) __dmsg(ds, ", scanning"); + if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) + __dmsg(ds, ", split-insert"); if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_LOCKED)) __dmsg(ds, ", split locked"); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 30e19147e12..57db820697f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -411,6 +411,7 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) for (; upd != NULL; upd = next) { /* Everything we free should be visible to everyone. */ WT_ASSERT(session, + F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || F_ISSET(session, WT_SESSION_DISCARD_FORCE) || upd->txnid == WT_TXN_ABORTED || __wt_txn_visible_all(session, upd->txnid)); diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 00452af549d..0907905bd43 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -624,11 +624,14 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * it may not have been set. */ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); - btree->maxmempage = - WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage); + btree->maxmempage = (uint64_t)cval.val; cache_size = S2C(session)->cache_size; if (cache_size > 0) - btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 4); + btree->maxmempage = WT_MIN(btree->maxmempage, + cache_size / 10); + + /* Enforce a lower bound of a single disk leaf page */ + btree->maxmempage = WT_MAX(btree->maxmempage, btree->maxleafpage); /* * Get the split percentage (reconciliation splits pages into smaller diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index c7d83d8dfff..530c1af2add 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -40,21 +40,15 @@ retry: WT_INTL_INDEX_GET(session, ref->home, pindex); * is slower. */ i = ref->pindex_hint; - if (i < pindex->entries && pindex->index[i]->page == ref->page) { - *pindexp = pindex; - *slotp = i; - return; - } + if (i < pindex->entries && pindex->index[i]->page == ref->page) + goto found; while (++i < pindex->entries) - if (pindex->index[i]->page == ref->page) { - *pindexp = pindex; - *slotp = ref->pindex_hint = i; - return; - } + if (pindex->index[i]->page == ref->page) + goto found; for (i = 0; i < pindex->entries; ++i) if (pindex->index[i]->page == ref->page) { - *pindexp = pindex; - *slotp = ref->pindex_hint = i; +found: *pindexp = pindex; + *slotp = i; return; } diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 1314f715907..dc16f3bf616 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -837,9 +837,8 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags) { WT_CACHE *cache; WT_DECL_RET; - WT_EVICT_ENTRY *evict; uint64_t cutoff; - uint32_t candidates, entries, i; + uint32_t candidates, entries; cache = S2C(session)->cache; @@ -857,6 +856,14 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags) while (entries > 0 && cache->evict[entries - 1].ref == NULL) --entries; + /* + * If we have more entries than the maximum tracked between walks, + * clear them. Do this before figuring out how many of the entries are + * candidates so we never end up with more candidates than entries. + */ + while (entries > WT_EVICT_WALK_BASE) + __evict_list_clear(session, &cache->evict[--entries]); + cache->evict_entries = entries; if (entries == 0) { @@ -900,15 +907,6 @@ __evict_lru_walk(WT_SESSION_IMPL *session, uint32_t flags) cache->evict_candidates = candidates; } - /* If we have more than the minimum number of entries, clear them. */ - if (cache->evict_entries > WT_EVICT_WALK_BASE) { - for (i = WT_EVICT_WALK_BASE, evict = cache->evict + i; - i < cache->evict_entries; - i++, evict++) - __evict_list_clear(session, evict); - cache->evict_entries = WT_EVICT_WALK_BASE; - } - cache->evict_current = cache->evict; __wt_spin_unlock(session, &cache->evict_lock); @@ -1155,7 +1153,7 @@ __evict_init_candidate( evict->ref = ref; evict->btree = S2BT(session); - /* Mark the page on the list */ + /* Mark the page on the list; set last to flush the other updates. */ F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU); } diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 046d8bb3eba..c944b9b0e22 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -413,7 +413,8 @@ __evict_review( if (closing) FLD_SET(reconcile_flags, WT_SKIP_UPDATE_ERR); else if (!WT_PAGE_IS_INTERNAL(page) && - page->read_gen == WT_READGEN_OLDEST) + (page->read_gen == WT_READGEN_OLDEST || + !__wt_txn_visible_all(session, page->modify->update_txn))) FLD_SET(reconcile_flags, WT_SKIP_UPDATE_RESTORE); WT_RET(__wt_reconcile(session, ref, NULL, reconcile_flags)); WT_ASSERT(session, diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index fb497f64963..e46fb66930a 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -684,7 +684,7 @@ struct __wt_ref { * up our slot in the page's index structure. */ WT_PAGE * volatile home; /* Reference page */ - uint32_t pindex_hint; /* Reference page index hint */ + volatile uint32_t pindex_hint; /* Reference page index hint */ #define WT_REF_DISK 0 /* Page is on disk */ #define WT_REF_DELETED 1 /* Page is on disk, but deleted */ @@ -888,7 +888,7 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) #define WT_UPDATE_MEMSIZE(upd) \ WT_ALIGN(sizeof(WT_UPDATE) + \ (WT_UPDATE_DELETED_ISSET(upd) ? 0 : (upd)->size), 32) -}; +WT_PACKED_STRUCT_END /* * WT_INSERT -- diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index f63a07de44e..99e4ec847fe 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -195,6 +195,13 @@ __wt_cache_full_check(WT_SESSION_IMPL *session) return (0); /* + * If the connection is closing we do not need eviction from an + * application thread. The eviction subsystem is already closed. + */ + if (F_ISSET(S2C(session), WT_CONN_CLOSING)) + return (0); + + /* * Only wake the eviction server the first time through here (if the * cache is too full). * diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index 5a7c2cb1b20..d38db0aac1d 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -10,7 +10,7 @@ static int __log_decompress(WT_SESSION_IMPL *, WT_ITEM *, WT_ITEM **); static int __log_openfile( - WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t); + WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t); static int __log_read_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t); static int __log_write_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, @@ -19,6 +19,112 @@ static int __log_write_internal(WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, #define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record)) /* + * __log_has_hole -- + * Determine if the current offset represents a hole in the log + * file (i.e. there is valid data somewhere after the hole), or + * if this is the end of this log file and the remainder of the + * file is zeroes. + */ +static int +__log_has_hole(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t log_size, wt_off_t offset, bool *hole) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_LOG *log; + wt_off_t off, remainder; + size_t bufsz, rdlen; + char *buf, *zerobuf; + + conn = S2C(session); + log = conn->log; + remainder = log_size - offset; + *hole = false; + + /* + * It can be very slow looking for the last real record in the log + * in very small chunks. Walk a megabyte at a time. If we find a + * part of the log that is not just zeroes we know this log file + * has a hole in it. + */ + buf = zerobuf = NULL; + if (log == NULL || log->allocsize < WT_MEGABYTE) + bufsz = WT_MEGABYTE; + else + bufsz = log->allocsize; + + if ((size_t)remainder < bufsz) + bufsz = (size_t)remainder; + WT_RET(__wt_calloc_def(session, bufsz, &buf)); + WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf)); + + /* + * Read in a chunk starting at the given offset. + * Compare against a known zero byte chunk. + */ + for (off = offset; remainder > 0; + remainder -= (wt_off_t)rdlen, off += (wt_off_t)rdlen) { + rdlen = WT_MIN(bufsz, (size_t)remainder); + WT_ERR(__wt_read(session, fh, off, rdlen, buf)); + if (memcmp(buf, zerobuf, rdlen) != 0) { + *hole = true; + break; + } + } + +err: __wt_free(session, buf); + __wt_free(session, zerobuf); + return (ret); +} + +/* + * __log_wait_for_earlier_slot -- + * Wait for write_lsn to catch up to this slot. + */ +static int +__log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) +{ + WT_CONNECTION_IMPL *conn; + WT_LOG *log; + int yield_count; + + conn = S2C(session); + log = conn->log; + yield_count = 0; + + while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) { + if (++yield_count < 1000) + __wt_yield(); + else + WT_RET(__wt_cond_wait( + session, log->log_write_cond, 200)); + } + return (0); +} + +/* + * __log_fs_write -- + * Wrapper when writing to a log file. If we're writing to a new log + * file for the first time wait for writes to the previous log file. + */ +static int +__log_fs_write(WT_SESSION_IMPL *session, + WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf) +{ + /* + * If we're writing into a new log file, we have to wait for all + * writes to the previous log file to complete otherwise there could + * be a hole at the end of the previous log file that we cannot detect. + */ + if (slot->slot_release_lsn.file < slot->slot_start_lsn.file) { + __log_wait_for_earlier_slot(session, slot); + WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); + (void)__wt_write(session, slot->slot_fh, offset, len, buf); + } + return (__wt_write(session, slot->slot_fh, offset, len, buf)); +} + +/* * __wt_log_ckpt -- * Record the given LSN as the checkpoint LSN and signal the archive * thread as needed. @@ -51,6 +157,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) conn = S2C(session); log = conn->log; + log_fh = NULL; /* * We need to wait for the previous log file to get written @@ -83,25 +190,20 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * Sync the log file if needed. */ if (LOG_CMP(&log->sync_lsn, min_lsn) < 0) { - /* - * Get our own file handle to the log file. It is possible - * for the file handle in the log structure to change out - * from under us and either be NULL or point to a different - * file than we want. - */ - WT_ERR(__log_openfile(session, - false, &log_fh, WT_LOG_FILENAME, min_lsn->file)); + WT_ERR(__log_openfile(session, false, + &log_fh, WT_LOG_FILENAME, min_lsn->file)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %d/%lu", log_fh->name, min_lsn->file, min_lsn->offset)); WT_ERR(__wt_fsync(session, log_fh)); log->sync_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync); - WT_ERR(__wt_close(session, &log_fh)); WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); } err: __wt_spin_unlock(session, &log->log_sync_lock); + if (log_fh != NULL) + WT_TRET(__wt_close(session, &log_fh)); return (ret); } @@ -467,13 +569,13 @@ __log_fill(WT_SESSION_IMPL *session, logrec = (WT_LOG_RECORD *)record->mem; /* - * Call __wt_write. For now the offset is the real byte offset. + * Call write. For now the offset is the real byte offset. * If the offset becomes a unit of WT_LOG_ALIGN this is where we would * multiply by WT_LOG_ALIGN to get the real file byte offset for * write(). */ if (direct) - WT_ERR(__wt_write(session, myslot->slot->slot_fh, + WT_ERR(__log_fs_write(session, myslot->slot, myslot->offset + myslot->slot->slot_start_offset, (size_t)logrec->len, (void *)logrec)); else @@ -936,94 +1038,6 @@ __wt_log_close(WT_SESSION_IMPL *session) } /* - * __log_filesize -- - * Returns an estimate of the real end of log file. - */ -static int -__log_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *eof) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_LOG *log; - wt_off_t log_size, off, off1; - uint32_t allocsize, bufsz; - char *buf, *zerobuf; - - conn = S2C(session); - log = conn->log; - if (eof == NULL) - return (0); - *eof = 0; - WT_RET(__wt_filesize(session, fh, &log_size)); - if (log == NULL) - allocsize = WT_LOG_ALIGN; - else - allocsize = log->allocsize; - - /* - * It can be very slow looking for the last real record in the log - * in very small chunks. Walk backward by a megabyte at a time. When - * we find a part of the log that is not just zeroes, walk to find - * the last record. - */ - buf = zerobuf = NULL; - if (allocsize < WT_MEGABYTE && log_size > WT_MEGABYTE) - bufsz = WT_MEGABYTE; - else - bufsz = allocsize; - WT_RET(__wt_calloc_def(session, bufsz, &buf)); - WT_ERR(__wt_calloc_def(session, bufsz, &zerobuf)); - - /* - * Read in a chunk starting at the end of the file. Keep going until - * we reach the beginning or we find a chunk that contains any non-zero - * bytes. Compare against a known zero byte chunk. - */ - for (off = log_size - (wt_off_t)bufsz; - off >= 0; - off -= (wt_off_t)bufsz) { - WT_ERR(__wt_read(session, fh, off, bufsz, buf)); - if (memcmp(buf, zerobuf, bufsz) != 0) - break; - } - - /* - * If we're walking by large amounts, now walk by the real allocsize - * to find the real end, if we found something. Otherwise we reached - * the beginning of the file. Offset can go negative if the log file - * size is not a multiple of a megabyte. The first chunk of the log - * file will always be non-zero. - */ - if (off < 0) - off = 0; - - /* - * We know all log records are aligned at log->allocsize. The first - * item in a log record is always a 32-bit length. Look for any - * non-zero length at the allocsize boundary. This may not be a true - * log record since it could be the middle of a large record. But we - * know no log record starts after it. Return an estimate of the log - * file size. - */ - for (off1 = bufsz - allocsize; - off1 > 0; off1 -= (wt_off_t)allocsize) - if (memcmp(buf + off1, zerobuf, sizeof(uint32_t)) != 0) - break; - off = off + off1; - - /* - * Set EOF to the last zero-filled record we saw. - */ - *eof = off + (wt_off_t)allocsize; -err: - if (buf != NULL) - __wt_free(session, buf); - if (zerobuf != NULL) - __wt_free(session, zerobuf); - return (ret); -} - -/* * __log_release -- * Release a log slot. */ @@ -1035,13 +1049,11 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_LOG *log; WT_LSN sync_lsn; size_t write_size; - int yield_count; bool locked; WT_DECL_SPINLOCK_ID(id); /* Must appear last */ conn = S2C(session); log = conn->log; - yield_count = 0; locked = false; if (freep != NULL) *freep = true; @@ -1050,7 +1062,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) if (F_ISSET(slot, WT_SLOT_BUFFERED)) { write_size = (size_t) (slot->slot_end_lsn.offset - slot->slot_start_offset); - WT_ERR(__wt_write(session, slot->slot_fh, + WT_ERR(__log_fs_write(session, slot, slot->slot_start_offset, write_size, slot->slot_buf.mem)); } @@ -1079,13 +1091,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * be holes in the log file. */ WT_STAT_FAST_CONN_INCR(session, log_release_write_lsn); - while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) { - if (++yield_count < 1000) - __wt_yield(); - else - WT_ERR(__wt_cond_wait( - session, log->log_write_cond, 200)); - } + WT_ERR(__log_wait_for_earlier_slot(session, slot)); log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; WT_ERR(__wt_cond_signal(session, log->log_write_cond)); @@ -1373,7 +1379,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen; u_int i, logcount; int firstrecord; - bool eol; + bool eol, partial_record; char **logfiles; conn = S2C(session); @@ -1462,12 +1468,22 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, } WT_ERR(__log_openfile( session, false, &log_fh, WT_LOG_FILENAME, start_lsn.file)); - WT_ERR(__log_filesize(session, log_fh, &log_size)); + WT_ERR(__wt_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; WT_ERR(__wt_buf_initsize(session, &buf, WT_LOG_ALIGN)); for (;;) { if (rd_lsn.offset + allocsize > log_size) { advance: + if (rd_lsn.offset == log_size) + partial_record = false; + else + /* + * See if there is anything non-zero at the + * end of this log file. + */ + WT_ERR(__log_has_hole( + session, log_fh, log_size, + rd_lsn.offset, &partial_record)); /* * If we read the last record, go to the next file. */ @@ -1475,6 +1491,15 @@ advance: log_fh = NULL; eol = true; /* + * If we had a partial record, we'll want to break + * now after closing and truncating. Although for now + * log_truncate does not modify the LSN passed in, + * this code does not assume it is unmodified after that + * call which is why it uses the boolean set earlier. + */ + if (partial_record) + break; + /* * Truncate this log file before we move to the next. */ if (LF_ISSET(WT_LOGSCAN_RECOVER)) @@ -1490,7 +1515,7 @@ advance: break; WT_ERR(__log_openfile(session, false, &log_fh, WT_LOG_FILENAME, rd_lsn.file)); - WT_ERR(__log_filesize(session, log_fh, &log_size)); + WT_ERR(__wt_filesize(session, log_fh, &log_size)); eol = false; continue; } @@ -1515,9 +1540,14 @@ advance: * that may exist. */ if (reclen == 0) { - /* This LSN is the end. */ - eol = true; - break; + WT_ERR(__log_has_hole( + session, log_fh, log_size, rd_lsn.offset, &eol)); + if (eol) + /* Found a hole. This LSN is the end. */ + break; + else + /* Last record in log. Look for more. */ + goto advance; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { @@ -1862,7 +1892,8 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_ERR(__log_release(session, myslot.slot, &free_slot)); if (free_slot) WT_ERR(__wt_log_slot_free(session, myslot.slot)); - } else if (LF_ISSET(WT_LOG_FSYNC)) { + } + if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (LOG_CMP(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am index 421de768248..ee5ce256653 100644 --- a/src/third_party/wiredtiger/test/format/Makefile.am +++ b/src/third_party/wiredtiger/test/format/Makefile.am @@ -1,14 +1,27 @@ -AM_CPPFLAGS = -DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" \ - -I$(top_builddir) -I$(top_srcdir)/src/include \ - -I$(BERKELEY_DB_PATH)/include +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility +if HAVE_BERKELEY_DB +AM_CPPFLAGS +=-DHAVE_BERKELEY_DB +AM_CPPFLAGS +=-DBERKELEY_DB_PATH=\"$(BERKELEY_DB_PATH)\" +AM_CPPFLAGS +=-I$(BERKELEY_DB_PATH)/include +endif noinst_PROGRAMS = t noinst_SCRIPTS = s_dumpcmp t_SOURCES =\ - config.h format.h backup.c bdb.c bulk.c compact.c config.c ops.c \ - salvage.c t.c util.c wts.c - -t_LDADD = $(top_builddir)/libwiredtiger.la -L$(BERKELEY_DB_PATH)/lib -ldb + backup.c bulk.c compact.c config.c lrt.c ops.c salvage.c t.c util.c \ + wts.c + +if HAVE_BERKELEY_DB +t_SOURCES += bdb.c +endif + +t_LDADD = $(top_builddir)/test/utility/libtest_util.la +t_LDADD +=$(top_builddir)/libwiredtiger.la +if HAVE_BERKELEY_DB +t_LDADD += -L$(BERKELEY_DB_PATH)/lib -ldb +endif t_LDFLAGS = -static #noinst_LTLIBRARIES = lzo_compress.la diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 3b95ea92b5e..d7446df5b82 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -37,41 +37,39 @@ check_copy(void) { WT_CONNECTION *conn; WT_SESSION *session; - int ret; - wts_open(g.home_backup, 0, &conn); + wts_open(g.home_backup, false, &conn); - if ((ret = conn->open_session( - conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session: %s", g.home_backup); + testutil_checkfmt( + conn->open_session(conn, NULL, NULL, &session), + "%s", g.home_backup); - ret = session->verify(session, g.uri, NULL); - if (ret != 0) - die(ret, "session.verify: %s: %s", g.home_backup, g.uri); + testutil_checkfmt( + session->verify(session, g.uri, NULL), + "%s: %s", g.home_backup, g.uri); - if ((ret = conn->close(conn, NULL)) != 0) - die(ret, "connection.close: %s", g.home_backup); + testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup); } /* * copy_file -- - * Copy a single file into the backup directory. + * Copy a single file into the backup directories. */ static void -copy_file(const char *name) +copy_file(WT_SESSION *session, const char *name) { size_t len; char *cmd; int ret; - len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20; - if ((cmd = malloc(len)) == NULL) - die(errno, "malloc"); - (void)snprintf(cmd, len, - "cp %s/%s %s/%s", g.home, name, g.home_backup, name); - if ((ret = system(cmd)) != 0) - die(ret, "backup copy: %s", cmd); - free(cmd); + len = strlen(g.home) + strlen(g.home_backup) + strlen(name) * 2 + 20; + if ((cmd = malloc(len)) == NULL) + testutil_die(errno, "malloc"); + (void)snprintf(cmd, len, + "cp %s/%s %s/%s", g.home, name, g.home_backup, name); + if ((ret = system(cmd)) != 0) + testutil_die(ret, "backup copy: %s", cmd); + free(cmd); } /* @@ -83,10 +81,11 @@ backup(void *arg) { WT_CONNECTION *conn; WT_CURSOR *backup_cursor; + WT_DECL_RET; WT_SESSION *session; - u_int period; - int ret; - const char *key; + u_int incremental, period; + bool full; + const char *config, *key; (void)(arg); @@ -97,58 +96,90 @@ backup(void *arg) return (NULL); /* Open a session. */ - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); /* - * Perform a backup at somewhere under 10 seconds (so we get at - * least one done), and then at 45 second intervals. + * Perform a full backup at somewhere under 10 seconds (that way there's + * at least one), then at larger intervals, optionally do incremental + * backups between full backups. */ - for (period = mmrand(NULL, 1, 10);; period = 45) { + incremental = 0; + for (period = mmrand(NULL, 1, 10);; period = mmrand(NULL, 20, 45)) { /* Sleep for short periods so we don't make the run wait. */ while (period > 0 && !g.workers_finished) { --period; sleep(1); } - if (g.workers_finished) - break; - /* Lock out named checkpoints */ - if ((ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0) - die(ret, "pthread_rwlock_wrlock: backup lock"); + /* + * We can't drop named checkpoints while there's a backup in + * progress, serialize backups with named checkpoints. Wait + * for the checkpoint to complete, otherwise backups might be + * starved out. + */ + testutil_check(pthread_rwlock_wrlock(&g.backup_lock)); + if (g.workers_finished) { + testutil_check(pthread_rwlock_unlock(&g.backup_lock)); + break; + } - /* Re-create the backup directory. */ - if ((ret = system(g.home_backup_init)) != 0) - die(ret, "backup directory creation failed"); + if (incremental) { + config = "target=(\"log:\")"; + full = false; + } else { + /* Re-create the backup directory. */ + testutil_checkfmt( + system(g.home_backup_init), + "%s", "backup directory creation failed"); + + config = NULL; + full = true; + } /* - * open_cursor can return EBUSY if a metadata operation is - * currently happening - retry in that case. + * open_cursor can return EBUSY if concurrent with a metadata + * operation, retry in that case. */ - while ((ret = session->open_cursor(session, - "backup:", NULL, NULL, &backup_cursor)) == EBUSY) - sleep(1); + while ((ret = session->open_cursor( + session, "backup:", NULL, config, &backup_cursor)) == EBUSY) + __wt_yield(); if (ret != 0) - die(ret, "session.open_cursor: backup"); + testutil_die(ret, "session.open_cursor: backup"); while ((ret = backup_cursor->next(backup_cursor)) == 0) { - if ((ret = - backup_cursor->get_key(backup_cursor, &key)) != 0) - die(ret, "cursor.get_key"); - copy_file(key); + testutil_check( + backup_cursor->get_key(backup_cursor, &key)); + copy_file(session, key); } + if (ret != WT_NOTFOUND) + testutil_die(ret, "backup-cursor"); - if ((ret = backup_cursor->close(backup_cursor)) != 0) - die(ret, "cursor.close"); + /* After an incremental backup, truncate the log files. */ + if (incremental) + testutil_check(session->truncate( + session, "log:", backup_cursor, NULL, NULL)); - if ((ret = pthread_rwlock_unlock(&g.backup_lock)) != 0) - die(ret, "pthread_rwlock_unlock: backup lock"); + testutil_check(backup_cursor->close(backup_cursor)); + testutil_check(pthread_rwlock_unlock(&g.backup_lock)); - check_copy(); + /* + * If automatic log archival isn't configured, optionally do + * incremental backups after each full backup. If we're not + * doing any more incremental, verify the backup (we can't + * verify intermediate states, once we perform recovery on the + * backup database, we can't do any more incremental backups). + */ + if (full) + incremental = + g.c_logging_archive ? 1 : mmrand(NULL, 1, 5); + if (--incremental == 0) + check_copy(); } - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + if (incremental != 0) + check_copy(); + + testutil_check(session->close(session, NULL)); return (NULL); } diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c index fec23112549..8b61573fdf9 100644 --- a/src/third_party/wiredtiger/test/format/bdb.c +++ b/src/third_party/wiredtiger/test/format/bdb.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -30,7 +30,7 @@ #include "format.h" static DBT key, value; -static uint8_t *keybuf; +static WT_ITEM keyitem; static int bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2 @@ -78,7 +78,7 @@ bdb_open(void) assert(db->cursor(db, NULL, &dbc, 0) == 0); g.dbc = dbc; - key_gen_setup(&keybuf); + key_gen_setup(&keyitem); } void @@ -95,8 +95,7 @@ bdb_close(void) assert(db->close(db, 0) == 0); assert(dbenv->close(dbenv, 0) == 0); - free(keybuf); - keybuf = NULL; + free(keyitem.mem); } void @@ -107,9 +106,9 @@ bdb_insert( DBC *dbc; key.data = (void *)key_data; - key.size = (uint32_t)key_size; + key.size = (u_int32_t)key_size; value.data = (void *)value_data; - value.size = (uint32_t)value_size; + value.size = (u_int32_t)value_size; dbc = g.dbc; @@ -128,7 +127,7 @@ bdb_np(int next, if ((ret = dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) { if (ret != DB_NOTFOUND) - die(ret, "dbc.get: %s: {%.*s}", + testutil_die(ret, "dbc.get: %s: {%.*s}", next ? "DB_NEXT" : "DB_PREV", (int)key.size, (char *)key.data); *notfoundp = 1; @@ -144,17 +143,16 @@ void bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) { DBC *dbc = g.dbc; - size_t size; int ret; - key_gen(keybuf, &size, keyno); - key.data = keybuf; - key.size = (uint32_t)size; + key_gen(&keyitem, keyno); + key.data = (void *)keyitem.data; + key.size = (u_int32_t)keyitem.size; *notfoundp = 0; if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) { if (ret != DB_NOTFOUND) - die(ret, "dbc.get: DB_SET: {%.*s}", + testutil_die(ret, "dbc.get: DB_SET: {%.*s}", (int)key.size, (char *)key.data); *notfoundp = 1; } else { @@ -165,25 +163,20 @@ bdb_read(uint64_t keyno, void *valuep, size_t *valuesizep, int *notfoundp) void bdb_update(const void *arg_key, size_t arg_key_size, - const void *arg_value, size_t arg_value_size, int *notfoundp) + const void *arg_value, size_t arg_value_size) { DBC *dbc = g.dbc; int ret; key.data = (void *)arg_key; - key.size = (uint32_t)arg_key_size; + key.size = (u_int32_t)arg_key_size; value.data = (void *)arg_value; - value.size = (uint32_t)arg_value_size; + value.size = (u_int32_t)arg_value_size; - *notfoundp = 0; - if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) { - if (ret != DB_NOTFOUND) { - die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", - (int)key.size, (char *)key.data, - (int)value.size, (char *)value.data); - } - *notfoundp = 1; - } + if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) + testutil_die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}", + (int)key.size, (char *)key.data, + (int)value.size, (char *)value.data); } void @@ -193,18 +186,19 @@ bdb_remove(uint64_t keyno, int *notfoundp) size_t size; int ret; - key_gen(keybuf, &size, keyno); - key.data = keybuf; - key.size = (uint32_t)size; + size = 0; + key_gen(&keyitem, keyno); + key.data = (void *)keyitem.data; + key.size = (u_int32_t)keyitem.size; bdb_read(keyno, &value.data, &size, notfoundp); - value.size = (uint32_t)size; + value.size = (u_int32_t)size; if (*notfoundp) return; if ((ret = dbc->del(dbc, 0)) != 0) { if (ret != DB_NOTFOUND) - die(ret, "dbc.del: {%.*s}", + testutil_die(ret, "dbc.del: {%.*s}", (int)key.size, (char *)key.data); *notfoundp = 1; } diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c index ad9bc180e5e..d59b3100686 100644 --- a/src/third_party/wiredtiger/test/format/bulk.c +++ b/src/third_party/wiredtiger/test/format/bulk.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -33,34 +33,37 @@ wts_load(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; + WT_DECL_RET; WT_ITEM key, value; WT_SESSION *session; - uint8_t *keybuf, *valbuf; - int is_bulk, ret; + bool is_bulk; conn = g.wts_conn; - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== bulk load start ==============="); /* - * Avoid bulk load with KVS (there's no bulk load support for a - * data-source); avoid bulk load with a custom collator, because - * the order of insertion will not match the collation order. + * No bulk load with data-sources. + * + * No bulk load with custom collators, the order of insertion will not + * match the collation order. */ - is_bulk = !g.c_reverse && - !DATASOURCE("kvsbdb") && !DATASOURCE("helium"); - if ((ret = session->open_cursor(session, g.uri, NULL, - is_bulk ? "bulk" : NULL, &cursor)) != 0) - die(ret, "session.open_cursor"); + is_bulk = true; + if (DATASOURCE("kvsbdb") && DATASOURCE("helium")) + is_bulk = false; + if (g.c_reverse) + is_bulk = false; - /* Set up the default key buffer. */ - key_gen_setup(&keybuf); - val_gen_setup(NULL, &valbuf); + testutil_check(session->open_cursor(session, g.uri, NULL, + is_bulk ? "bulk,append" : NULL, &cursor)); + + /* Set up the key/value buffers. */ + key_gen_setup(&key); + val_gen_setup(NULL, &value); for (;;) { if (++g.key_cnt > g.c_rows) { @@ -69,13 +72,11 @@ wts_load(void) } /* Report on progress every 100 inserts. */ - if (g.key_cnt % 100 == 0) + if (g.key_cnt % 1000 == 0) track("bulk load", g.key_cnt, NULL); - key_gen(keybuf, &key.size, (uint64_t)g.key_cnt); - key.data = keybuf; - val_gen(NULL, valbuf, &value.size, (uint64_t)g.key_cnt); - value.data = valbuf; + key_gen(&key, g.key_cnt); + val_gen(NULL, &value, g.key_cnt); switch (g.type) { case FIX: @@ -84,7 +85,7 @@ wts_load(void) cursor->set_value(cursor, *(uint8_t *)value.data); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {0x%02" PRIx8 "}", + "%-10s %" PRIu64 " {0x%02" PRIx8 "}", "bulk V", g.key_cnt, ((uint8_t *)value.data)[0]); break; @@ -94,7 +95,7 @@ wts_load(void) cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", + "%-10s %" PRIu64 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; @@ -102,37 +103,53 @@ wts_load(void) cursor->set_key(cursor, &key); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk K", + "%-10s %" PRIu64 " {%.*s}", "bulk K", g.key_cnt, (int)key.size, (char *)key.data); cursor->set_value(cursor, &value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s %" PRIu32 " {%.*s}", "bulk V", + "%-10s %" PRIu64 " {%.*s}", "bulk V", g.key_cnt, (int)value.size, (char *)value.data); break; } - if ((ret = cursor->insert(cursor)) != 0) - die(ret, "cursor.insert"); - - if (!SINGLETHREADED) - continue; + /* + * We don't want to size the cache to ensure the initial data + * set can load in the in-memory case, guaranteeing the load + * succeeds probably means future updates are also guaranteed + * to succeed, which isn't what we want. If we run out of space + * in the initial load, reset the row counter and continue. + * + * Decrease inserts, they can't be successful if we're at the + * cache limit, and increase the delete percentage to get some + * extra space once the run starts. + */ + if ((ret = cursor->insert(cursor)) != 0) { + g.rows = --g.key_cnt; + g.c_rows = (uint32_t)g.key_cnt; + + if (g.c_insert_pct > 5) + g.c_insert_pct = 5; + if (g.c_delete_pct < 20) + g.c_delete_pct += 20; + break; + } - /* Insert the item into BDB. */ - bdb_insert(key.data, key.size, value.data, value.size); +#ifdef HAVE_BERKELEY_DB + if (SINGLETHREADED) + bdb_insert(key.data, key.size, value.data, value.size); +#endif } - if ((ret = cursor->close(cursor)) != 0) - die(ret, "cursor.close"); + testutil_check(cursor->close(cursor)); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== bulk load stop ==============="); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); - free(keybuf); - free(valbuf); + free(key.mem); + free(value.mem); } diff --git a/src/third_party/wiredtiger/test/format/compact.c b/src/third_party/wiredtiger/test/format/compact.c index ad603504023..240e5553697 100644 --- a/src/third_party/wiredtiger/test/format/compact.c +++ b/src/third_party/wiredtiger/test/format/compact.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -36,9 +36,9 @@ void * compact(void *arg) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; u_int period; - int ret; (void)(arg); @@ -48,8 +48,7 @@ compact(void *arg) /* Open a session. */ conn = g.wts_conn; - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); /* * Perform compaction at somewhere under 15 seconds (so we get at @@ -66,11 +65,10 @@ compact(void *arg) if ((ret = session->compact( session, g.uri, NULL)) != 0 && ret != WT_ROLLBACK) - die(ret, "session.compact"); + testutil_die(ret, "session.compact"); } - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); return (NULL); } diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index bc05a422391..572aae9bad4 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -30,15 +30,17 @@ #include "config.h" static void config_checksum(void); -static void config_compression(void); +static void config_compression(const char *); static const char *config_file_type(u_int); static CONFIG *config_find(const char *, size_t); static int config_is_perm(const char *); static void config_isolation(void); +static void config_lrt(void); static void config_map_checksum(const char *, u_int *); static void config_map_compression(const char *, u_int *); static void config_map_file_type(const char *, u_int *); static void config_map_isolation(const char *, u_int *); +static void config_reset(void); /* * config_setup -- @@ -50,7 +52,7 @@ config_setup(void) CONFIG *cp; /* Clear any temporary values. */ - config_clear(); + config_reset(); /* * Choose a data source type and a file type: they're interrelated (LSM @@ -100,8 +102,7 @@ config_setup(void) * our configuration, LSM or KVS devices are "tables", but files are * tested as well. */ - if ((g.uri = malloc(256)) == NULL) - die(errno, "malloc"); + g.uri = dmalloc(256); strcpy(g.uri, DATASOURCE("file") ? "file:" : "table:"); if (DATASOURCE("helium")) strcat(g.uri, "dev1/"); @@ -120,22 +121,19 @@ config_setup(void) if (F_ISSET(cp, C_BOOL)) *cp->v = mmrand(NULL, 1, 100) <= cp->min ? 1 : 0; else - *cp->v = CONF_RAND(cp); + *cp->v = mmrand(NULL, cp->min, cp->maxrand); } /* Required shared libraries. */ if (DATASOURCE("helium") && access(HELIUM_PATH, R_OK) != 0) - die(errno, "Levyx/helium shared library: %s", HELIUM_PATH); + testutil_die(errno, + "Levyx/helium shared library: %s", HELIUM_PATH); if (DATASOURCE("kvsbdb") && access(KVS_BDB_PATH, R_OK) != 0) - die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH); + testutil_die(errno, "kvsbdb shared library: %s", KVS_BDB_PATH); /* Some data-sources don't support user-specified collations. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) - g.c_reverse = 0; - - config_checksum(); - config_compression(); - config_isolation(); + config_single("reverse=off", 0); /* * Periodically, run single-threaded so we can compare the results to @@ -145,13 +143,19 @@ config_setup(void) if (!g.replay && g.run_cnt % 20 == 19 && !config_is_perm("threads")) g.c_threads = 1; + config_checksum(); + config_compression("compression"); + config_compression("logging_compression"); + config_isolation(); + config_lrt(); + /* * Periodically, set the delete percentage to 0 so salvage gets run, * as long as the delete percentage isn't nailed down. * Don't do it on the first run, all our smoke tests would hit it. */ if (!g.replay && g.run_cnt % 10 == 9 && !config_is_perm("delete_pct")) - g.c_delete_pct = 0; + config_single("delete_pct=0", 0); /* * If this is an LSM run, set the cache size and crank up the insert @@ -165,9 +169,22 @@ config_setup(void) g.c_insert_pct = mmrand(NULL, 50, 85); } - /* Make the default maximum-run length 20 minutes. */ - if (!config_is_perm("timer")) - g.c_timer = 20; + /* Ensure there is at least 1MB of cache per thread. */ + if (!config_is_perm("cache") && g.c_cache < g.c_threads) + g.c_cache = g.c_threads; + + /* + * Run-length configured by a number of operations and a timer. If the + * operation count and the timer are both set by a configuration, there + * isn't anything to do. If only the operation count was configured, + * set a default maximum-run of 20 minutes. If only the timer is set, + * clear the operations count (which was set randomly). + */ + if (config_is_perm("timer")) { + if (!config_is_perm("ops")) + config_single("ops=0", 0); + } else + config_single("timer=20", 0); /* * Key/value minimum/maximum are related, correct unless specified by @@ -178,14 +195,15 @@ config_setup(void) if (!config_is_perm("key_max") && g.c_key_max < g.c_key_min) g.c_key_max = g.c_key_min; if (g.c_key_min > g.c_key_max) - die(EINVAL, "key_min may not be larger than key_max"); + testutil_die(EINVAL, "key_min may not be larger than key_max"); if (!config_is_perm("value_min") && g.c_value_min > g.c_value_max) g.c_value_min = g.c_value_max; if (!config_is_perm("value_max") && g.c_value_max < g.c_value_min) g.c_value_max = g.c_value_min; if (g.c_value_min > g.c_value_max) - die(EINVAL, "value_min may not be larger than value_max"); + testutil_die(EINVAL, + "value_min may not be larger than value_max"); /* Reset the key count. */ g.key_cnt = 0; @@ -218,39 +236,63 @@ config_checksum(void) * Compression configuration. */ static void -config_compression(void) +config_compression(const char *conf_name) { const char *cstr; + char confbuf[128]; + + /* Return if already specified. */ + if (config_is_perm(conf_name)) + return; /* - * Compression: choose something if compression wasn't specified, - * otherwise confirm the appropriate shared library is available. - * We used to verify that the libraries existed but that's no longer - * robust, since it's possible to build compression libraries into - * the WiredTiger library. + * Don't configure a compression engine for logging if logging isn't + * configured (it won't break, but it's confusing). */ - if (!config_is_perm("compression")) { - cstr = "compression=none"; - switch (mmrand(NULL, 1, 20)) { - case 1: case 2: case 3: case 4: /* 20% no compression */ - break; - case 8: case 9: case 10: case 11: /* 20% lz4 */ - cstr = "compression=lz4"; - break; - case 5: case 6: case 7: - case 12: case 13: case 14: case 15: /* 35% snappy */ - cstr = "compression=snappy"; - break; - case 16: case 17: case 18: case 19: /* 20% zlib */ - cstr = "compression=zlib"; - break; - case 20: /* 5% zlib-no-raw */ - cstr = "compression=zlib-noraw"; - break; - } + cstr = "none"; + if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) { + (void)snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + config_single(confbuf, 0); + return; + } - config_single(cstr, 0); + /* + * Select a compression type from the list of built-in engines. + * + * Listed percentages are only correct if all of the possible engines + * are compiled in. + */ + switch (mmrand(NULL, 1, 20)) { +#ifdef HAVE_BUILTIN_EXTENSION_LZ4 + case 1: case 2: case 3: case 4: /* 20% lz4 */ + cstr = "lz4"; + break; + case 5: /* 5% lz4-no-raw */ + cstr = "lz4-noraw"; + break; +#endif +#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY + case 6: case 7: case 8: case 9: /* 30% snappy */ + case 10: case 11: + cstr = "snappy"; + break; +#endif +#ifdef HAVE_BUILTIN_EXTENSION_ZLIB + case 12: case 13: case 14: case 15: /* 20% zlib */ + cstr = "zlib"; + break; + case 16: /* 5% zlib-no-raw */ + cstr = "zlib-noraw"; + break; +#endif + case 17: case 18: case 19: case 20: /* 15% no compression */ + default: + break; } + + (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + config_single(confbuf, 0); } /* @@ -287,6 +329,22 @@ config_isolation(void) } /* + * config_lrt -- + * Long-running transaction configuration. + */ +static void +config_lrt(void) +{ + if (g.type == FIX) { + if (config_is_perm("long_running_txn")) + testutil_die(EINVAL, + "long_running_txn not supported with fixed-length " + "column store"); + config_single("long_running_txn=off", 0); + } +} + +/* * config_error -- * Display configuration information on error. */ @@ -320,7 +378,7 @@ config_print(int error_display) fp = stdout; else if ((fp = fopen(g.home_config, "w")) == NULL) - die(errno, "fopen: %s", g.home_config); + testutil_die(errno, "fopen: %s", g.home_config); fprintf(fp, "############################################\n"); fprintf(fp, "# RUN PARAMETERS\n"); @@ -335,8 +393,12 @@ config_print(int error_display) fprintf(fp, "%s=%" PRIu32 "\n", cp->name, *cp->v); fprintf(fp, "############################################\n"); + + /* Flush so we're up-to-date on error. */ + (void)fflush(fp); + if (fp != stdout) - (void)fclose(fp); + fclose_and_clear(&fp); } /* @@ -350,7 +412,7 @@ config_file(const char *name) char *p, buf[256]; if ((fp = fopen(name, "r")) == NULL) - die(errno, "fopen: %s", name); + testutil_die(errno, "fopen: %s", name); while (fgets(buf, sizeof(buf), fp) != NULL) { for (p = buf; *p != '\0' && *p != '\n'; ++p) ; @@ -359,24 +421,42 @@ config_file(const char *name) continue; config_single(buf, 1); } - (void)fclose(fp); + fclose_and_clear(&fp); } /* * config_clear -- - * Clear per-run values. + * Clear all configuration values. */ void config_clear(void) { CONFIG *cp; - /* Clear configuration data. */ + /* Clear all allocated configuration data. */ + for (cp = c; cp->name != NULL; ++cp) + if (cp->vstr != NULL) { + free((void *)*cp->vstr); + *cp->vstr = NULL; + } + free(g.uri); + g.uri = NULL; +} + +/* + * config_reset -- + * Clear per-run configuration values. + */ +static void +config_reset(void) +{ + CONFIG *cp; + + /* Clear temporary allocated configuration data. */ for (cp = c; cp->name != NULL; ++cp) { F_CLR(cp, C_TEMP); - if (!F_ISSET(cp, C_PERM) && - F_ISSET(cp, C_STRING) && cp->vstr != NULL) { - free(*cp->vstr); + if (!F_ISSET(cp, C_PERM) && cp->vstr != NULL) { + free((void *)*cp->vstr); *cp->vstr = NULL; } } @@ -392,7 +472,7 @@ void config_single(const char *s, int perm) { CONFIG *cp; - uint64_t v; + long v; char *p; const char *ep; @@ -418,35 +498,55 @@ config_single(const char *s, int perm) exit(EXIT_FAILURE); } + /* + * Free the previous setting if a configuration has been + * passed in twice. + */ + if (*cp->vstr != NULL) { + free(*cp->vstr); + *cp->vstr = NULL; + } + if (strncmp(s, "checksum", strlen("checksum")) == 0) { config_map_checksum(ep, &g.c_checksum_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp( s, "compression", strlen("compression")) == 0) { config_map_compression(ep, &g.c_compression_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp(s, "isolation", strlen("isolation")) == 0) { config_map_isolation(ep, &g.c_isolation_flag); - *cp->vstr = strdup(ep); + *cp->vstr = dstrdup(ep); } else if (strncmp(s, "file_type", strlen("file_type")) == 0) { config_map_file_type(ep, &g.type); - *cp->vstr = strdup(config_file_type(g.type)); + *cp->vstr = dstrdup(config_file_type(g.type)); + } else if (strncmp(s, "logging_compression", + strlen("logging_compression")) == 0) { + config_map_compression(ep, + &g.c_logging_compression_flag); + *cp->vstr = dstrdup(ep); } else { - if (*cp->vstr != NULL) - free(*cp->vstr); - *cp->vstr = strdup(ep); + free((void *)*cp->vstr); + *cp->vstr = dstrdup(ep); } - if (*cp->vstr == NULL) - die(errno, "malloc"); return; } - v = strtoul(ep, &p, 10); - if (*p != '\0') { - fprintf(stderr, "%s: %s: illegal numeric value\n", - g.progname, s); - exit(EXIT_FAILURE); + v = -1; + if (F_ISSET(cp, C_BOOL)) { + if (strncmp(ep, "off", strlen("off")) == 0) + v = 0; + else if (strncmp(ep, "on", strlen("on")) == 0) + v = 1; + } + if (v == -1) { + v = strtol(ep, &p, 10); + if (*p != '\0') { + fprintf(stderr, "%s: %s: illegal numeric value\n", + g.progname, s); + exit(EXIT_FAILURE); + } } if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { @@ -454,10 +554,10 @@ config_single(const char *s, int perm) g.progname, s); exit(EXIT_FAILURE); } - } else if ((uint32_t)v < cp->min || (uint32_t)v > cp->maxset) { - fprintf(stderr, "%s: %s: value of %" PRIu32 - " outside min/max values of %" PRIu32 "-%" PRIu32 "\n", - g.progname, s, *cp->v, cp->min, cp->maxset); + } else if (v < cp->min || v > cp->maxset) { + fprintf(stderr, "%s: %s: value outside min/max values of %" + PRIu32 "-%" PRIu32 "\n", + g.progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } *cp->v = (uint32_t)v; @@ -480,7 +580,7 @@ config_map_file_type(const char *s, u_int *vp) strcmp(s, "row-store") == 0) *vp = ROW; else - die(EINVAL, "illegal file type configuration: %s", s); + testutil_die(EINVAL, "illegal file type configuration: %s", s); } /* @@ -497,7 +597,7 @@ config_map_checksum(const char *s, u_int *vp) else if (strcmp(s, "uncompressed") == 0) *vp = CHECKSUM_UNCOMPRESSED; else - die(EINVAL, "illegal checksum configuration: %s", s); + testutil_die(EINVAL, "illegal checksum configuration: %s", s); } /* @@ -509,12 +609,10 @@ config_map_compression(const char *s, u_int *vp) { if (strcmp(s, "none") == 0) *vp = COMPRESS_NONE; - else if (strcmp(s, "bzip") == 0) - *vp = COMPRESS_BZIP; - else if (strcmp(s, "bzip-raw") == 0) - *vp = COMPRESS_BZIP_RAW; else if (strcmp(s, "lz4") == 0) *vp = COMPRESS_LZ4; + else if (strcmp(s, "lz4-noraw") == 0) + *vp = COMPRESS_LZ4_NO_RAW; else if (strcmp(s, "lzo") == 0) *vp = COMPRESS_LZO; else if (strcmp(s, "snappy") == 0) @@ -524,7 +622,8 @@ config_map_compression(const char *s, u_int *vp) else if (strcmp(s, "zlib-noraw") == 0) *vp = COMPRESS_ZLIB_NO_RAW; else - die(EINVAL, "illegal compression configuration: %s", s); + testutil_die(EINVAL, + "illegal compression configuration: %s", s); } /* @@ -543,7 +642,7 @@ config_map_isolation(const char *s, u_int *vp) else if (strcmp(s, "snapshot") == 0) *vp = ISOLATION_SNAPSHOT; else - die(EINVAL, "illegal isolation configuration: %s", s); + testutil_die(EINVAL, "illegal isolation configuration: %s", s); } /* @@ -555,7 +654,7 @@ config_find(const char *s, size_t len) { CONFIG *cp; - for (cp = c; cp->name != NULL; ++cp) + for (cp = c; cp->name != NULL; ++cp) if (strncmp(s, cp->name, len) == 0 && cp->name[len] == '\0') return (cp); diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index 2d2b4e03010..f8dc5a703e5 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -37,7 +37,7 @@ typedef struct { /* Value is a boolean, yes if roll of 1-to-100 is <= CONFIG->min. */ #define C_BOOL 0x001 - /* Not a simple randomization, handle outside the main loop. */ + /* Not a simple randomization, handle outside the main loop. */ #define C_IGNORE 0x002 /* Value was set from command-line or file, ignore for all runs. */ @@ -57,11 +57,8 @@ typedef struct { char **vstr; /* Value for string options */ } CONFIG; -/* - * Get a random value between a config min/max pair (inclusive for both min - * and max). - */ -#define CONF_RAND(cp) mmrand(NULL, (cp)->min, (cp)->maxrand) +#define COMPRESSION_LIST \ + "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw)" static CONFIG c[] = { { "abort", @@ -72,9 +69,9 @@ static CONFIG c[] = { "if LSM inserts are throttled", /* 90% */ C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL }, - { "firstfit", - "if allocation is firstfit", /* 10% */ - C_BOOL, 10, 0, 0, &g.c_firstfit, NULL }, + { "backups", + "if backups are enabled", /* 20% */ + C_BOOL, 20, 0, 0, &g.c_backups, NULL }, { "bitcnt", "number of bits for fixed-length column-store files", @@ -117,8 +114,7 @@ static CONFIG c[] = { C_BOOL, 10, 0, 0, &g.c_compact, NULL }, { "compression", - "type of compression " - "(none | bzip | bzip-raw | lz4 | lzo | snappy | zlib | zlib-noraw)", + "type of compression " COMPRESSION_LIST, C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_compression }, { "data_extend", @@ -127,7 +123,7 @@ static CONFIG c[] = { { "data_source", "data source (file | helium | kvsbdb | lsm | table)", - C_IGNORE | C_STRING, 0, 0, 0, NULL, &g.c_data_source }, + C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_data_source }, { "delete_pct", "percent operations that are deletes", @@ -137,6 +133,10 @@ static CONFIG c[] = { "if values are dictionary compressed", /* 20% */ C_BOOL, 20, 0, 0, &g.c_dictionary, NULL }, + { "direct_io", + "if direct I/O is configured for data objects", /* 0% */ + C_IGNORE, 0, 0, 1, &g.c_direct_io, NULL }, + { "evict_max", "the maximum number of eviction workers", 0x0, 0, 5, 100, &g.c_evict_max, NULL }, @@ -145,9 +145,9 @@ static CONFIG c[] = { "type of store to create (fix | var | row)", C_IGNORE|C_STRING, 1, 3, 3, NULL, &g.c_file_type }, - { "backups", - "if backups are enabled", /* 5% */ - C_BOOL, 5, 0, 0, &g.c_backups, NULL }, + { "firstfit", + "if allocation is firstfit", /* 10% */ + C_BOOL, 10, 0, 0, &g.c_firstfit, NULL }, { "huffman_key", "if keys are huffman encoded", /* 20% */ @@ -186,26 +186,34 @@ static CONFIG c[] = { "minimum size of keys", 0x0, 10, 32, 256, &g.c_key_min, NULL }, - { "leak_memory", - "if memory should be leaked on close", - C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL }, - { "leaf_page_max", "maximum size of Btree leaf nodes", 0x0, 9, 17, 27, &g.c_leaf_page_max, NULL }, + { "leak_memory", + "if memory should be leaked on close", + C_BOOL, 0, 0, 0, &g.c_leak_memory, NULL }, + { "logging", - "if logging configured", /* 30% */ - C_BOOL, 30, 0, 0, &g.c_logging, NULL }, + "if logging configured", /* 50% */ + C_BOOL, 50, 0, 0, &g.c_logging, NULL }, { "logging_archive", "if log file archival configured", /* 50% */ C_BOOL, 50, 0, 0, &g.c_logging_archive, NULL }, + { "logging_compression", + "type of logging compression " COMPRESSION_LIST, + C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_logging_compression }, + { "logging_prealloc", "if log file pre-allocation configured", /* 50% */ C_BOOL, 50, 0, 0, &g.c_logging_prealloc, NULL }, + { "long_running_txn", + "if a long-running transaction configured", /* 0% */ + C_BOOL, 0, 0, 0, &g.c_long_running_txn, NULL }, + { "lsm_worker_threads", "the number of LSM worker threads", 0x0, 3, 4, 20, &g.c_lsm_worker_threads, NULL }, @@ -230,6 +238,10 @@ static CONFIG c[] = { "minimum gain before prefix compression is used", 0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL }, + { "quiet", + "quiet run (same as -q)", + C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL }, + { "repeat_data_pct", "percent duplicate values in row- or var-length column-stores", 0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL }, @@ -246,6 +258,10 @@ static CONFIG c[] = { "the number of runs", C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL }, + { "salvage", + "salvage testing", /* 100% */ + C_BOOL, 100, 1, 0, &g.c_salvage, NULL }, + { "split_pct", "page split size as a percentage of the maximum page size", 0x0, 40, 85, 85, &g.c_split_pct, NULL }, @@ -266,6 +282,10 @@ static CONFIG c[] = { "maximum time to run in minutes (default 20 minutes)", C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_timer, NULL }, + { "transaction-frequency", + "percent operations done inside an explicit transaction", + 0x0, 1, 100, 100, &g.c_txn_freq, NULL }, + { "value_max", "maximum size of values", 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL }, @@ -274,6 +294,10 @@ static CONFIG c[] = { "minimum size of values", 0x0, 0, 20, 4096, &g.c_value_min, NULL }, + { "verify", + "to regularly verify during a run", /* 100% */ + C_BOOL, 100, 1, 0, &g.c_verify, NULL }, + { "wiredtiger_config", "configuration string used to wiredtiger_open", C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open }, diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 7cd75cd0475..bab9ef26f20 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -26,52 +26,22 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include <sys/stat.h> -#ifndef _WIN32 -#include <sys/time.h> -#endif -#include <sys/types.h> - -#include <assert.h> -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <inttypes.h> -#include <limits.h> -#ifndef _WIN32 -#include <pthread.h> -#endif -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#ifndef _WIN32 -#include <unistd.h> -#endif -#include <time.h> - -#ifdef _WIN32 -#include "windows_shim.h" -#endif - -#include <wt_internal.h> +#include "test_util.h" #ifdef BDB -#include <db.h> +/* + * Berkeley DB has an #ifdef we need to provide a value for, we'll see an + * undefined error if it's unset during a strict compile. + */ +#ifndef DB_DBM_HSEARCH +#define DB_DBM_HSEARCH 0 #endif - -#if defined(__GNUC__) -#define WT_GCC_ATTRIBUTE(x) __attribute__(x) -#else -#define WT_GCC_ATTRIBUTE(x) +#include <assert.h> +#include <db.h> #endif -extern WT_EXTENSION_API *wt_api; - #define EXTPATH "../../ext/" /* Extensions path */ -#define BZIP_PATH \ - EXTPATH "compressors/bzip2/.libs/libwiredtiger_bzip2.so" #define LZ4_PATH \ EXTPATH "compressors/lz4/.libs/libwiredtiger_lz4.so" #define SNAPPY_PATH \ @@ -95,8 +65,6 @@ extern WT_EXTENSION_API *wt_api; #define KILOBYTE(v) ((v) * 1024) #undef MEGABYTE #define MEGABYTE(v) ((v) * 1048576) -#undef GIGABYTE -#define GIGABYTE(v) ((v) * 1073741824ULL) #define WT_NAME "wt" /* Object name */ @@ -105,12 +73,6 @@ extern WT_EXTENSION_API *wt_api; #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ -#ifndef _WIN32 -#define SIZET_FMT "%zu" /* size_t format string */ -#else -#define SIZET_FMT "%Iu" /* size_t format string */ -#endif - typedef struct { char *progname; /* Program name */ @@ -120,7 +82,6 @@ typedef struct { char *home_bdb; /* BDB directory */ char *home_config; /* Run CONFIG file path */ char *home_init; /* Initialize home command */ - char *home_kvs; /* KVS directory */ char *home_log; /* Operation log file path */ char *home_rand; /* RNG log file path */ char *home_salvage_copy; /* Salvage copy command */ @@ -128,14 +89,18 @@ typedef struct { char *helium_mount; /* Helium volume */ + char wiredtiger_open_config[8 * 1024]; /* Database open config */ + +#ifdef HAVE_BERKELEY_DB void *bdb; /* BDB comparison handle */ void *dbc; /* BDB cursor handle */ +#endif WT_CONNECTION *wts_conn; WT_EXTENSION_API *wt_api; int rand_log_stop; /* Logging turned off */ - FILE *rand_log; /* Random number log */ + FILE *randfp; /* Random number log */ uint32_t run_cnt; /* Run counter */ @@ -146,10 +111,10 @@ typedef struct { FILE *logfp; /* Log file */ int replay; /* Replaying a run. */ - int track; /* Track progress */ int workers_finished; /* Operations completed */ - pthread_rwlock_t backup_lock; /* Hot backup running */ + pthread_rwlock_t backup_lock; /* Backup running */ + pthread_rwlock_t checkpoint_lock; /* Checkpoint running */ WT_RAND_STATE rnd; /* Global RNG state */ @@ -163,6 +128,8 @@ typedef struct { size_t append_cnt; /* Current unresolved records */ pthread_rwlock_t append_lock; /* Single-thread resolution */ + pthread_rwlock_t death_lock; /* Single-thread failure */ + char *uri; /* Object name */ char *config_open; /* Command-line configuration */ @@ -178,23 +145,24 @@ typedef struct { uint32_t c_cache; uint32_t c_compact; uint32_t c_checkpoints; - char *c_checksum; + char *c_checksum; uint32_t c_chunk_size; - char *c_compression; - char *c_config_open; + char *c_compression; + char *c_config_open; uint32_t c_data_extend; - char *c_data_source; + char *c_data_source; uint32_t c_delete_pct; uint32_t c_dictionary; + uint32_t c_direct_io; uint32_t c_evict_max; uint32_t c_firstfit; - char *c_file_type; + char *c_file_type; uint32_t c_huffman_key; uint32_t c_huffman_value; uint32_t c_insert_pct; uint32_t c_internal_key_truncation; uint32_t c_intl_page_max; - char *c_isolation; + char *c_isolation; uint32_t c_key_gap; uint32_t c_key_max; uint32_t c_key_min; @@ -202,27 +170,33 @@ typedef struct { uint32_t c_leak_memory; uint32_t c_logging; uint32_t c_logging_archive; + char *c_logging_compression; uint32_t c_logging_prealloc; + uint32_t c_long_running_txn; uint32_t c_lsm_worker_threads; uint32_t c_merge_max; uint32_t c_mmap; uint32_t c_ops; + uint32_t c_quiet; uint32_t c_prefix_compression; uint32_t c_prefix_compression_min; uint32_t c_repeat_data_pct; uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; + uint32_t c_salvage; uint32_t c_split_pct; uint32_t c_statistics; uint32_t c_statistics_server; uint32_t c_threads; uint32_t c_timer; + uint32_t c_txn_freq; uint32_t c_value_max; uint32_t c_value_min; + uint32_t c_verify; uint32_t c_write_pct; -#define FIX 1 +#define FIX 1 #define ROW 2 #define VAR 3 u_int type; /* File type's flag value */ @@ -233,14 +207,14 @@ typedef struct { u_int c_checksum_flag; /* Checksum flag value */ #define COMPRESS_NONE 1 -#define COMPRESS_BZIP 2 -#define COMPRESS_BZIP_RAW 3 -#define COMPRESS_LZ4 4 -#define COMPRESS_LZO 5 -#define COMPRESS_SNAPPY 6 -#define COMPRESS_ZLIB 7 -#define COMPRESS_ZLIB_NO_RAW 8 +#define COMPRESS_LZ4 2 +#define COMPRESS_LZ4_NO_RAW 3 +#define COMPRESS_LZO 4 +#define COMPRESS_SNAPPY 5 +#define COMPRESS_ZLIB 6 +#define COMPRESS_ZLIB_NO_RAW 7 u_int c_compression_flag; /* Compression flag value */ + u_int c_logging_compression_flag; /* Log compression flag value */ #define ISOLATION_RANDOM 1 #define ISOLATION_READ_UNCOMMITTED 2 @@ -255,7 +229,7 @@ typedef struct { } GLOBAL; extern GLOBAL g; -typedef struct { +typedef struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) { WT_RAND_STATE rnd; /* thread RNG state */ uint64_t search; /* operations */ @@ -277,15 +251,17 @@ typedef struct { #define TINFO_COMPLETE 2 /* Finished */ #define TINFO_JOINED 3 /* Resolved */ volatile int state; /* state */ -} TINFO WT_GCC_ATTRIBUTE((aligned(64))); +} TINFO; +#ifdef HAVE_BERKELEY_DB void bdb_close(void); void bdb_insert(const void *, size_t, const void *, size_t); void bdb_np(int, void *, size_t *, void *, size_t *, int *); void bdb_open(void); void bdb_read(uint64_t, void *, size_t *, int *); void bdb_remove(uint64_t, int *); -void bdb_update(const void *, size_t, const void *, size_t, int *); +void bdb_update(const void *, size_t, const void *, size_t); +#endif void *backup(void *); void *compact(void *); @@ -296,32 +272,29 @@ void config_print(int); void config_setup(void); void config_single(const char *, int); void fclose_and_clear(FILE **); -void key_gen(uint8_t *, size_t *, uint64_t); -void key_gen_insert(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t); -void key_gen_setup(uint8_t **); +void key_gen(WT_ITEM *, uint64_t); +void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t); +void key_gen_setup(WT_ITEM *); void key_len_setup(void); +void *lrt(void *); void path_setup(const char *); +int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); uint32_t rng(WT_RAND_STATE *); void track(const char *, uint64_t, TINFO *); -void val_gen(WT_RAND_STATE *, uint8_t *, size_t *, uint64_t); -void val_gen_setup(WT_RAND_STATE *, uint8_t **); +void val_gen(WT_RAND_STATE *, WT_ITEM *, uint64_t); +void val_gen_setup(WT_RAND_STATE *, WT_ITEM *); void wts_close(void); -void wts_create(void); void wts_dump(const char *, int); +void wts_init(void); void wts_load(void); -void wts_open(const char *, int, WT_CONNECTION **); +void wts_open(const char *, bool, WT_CONNECTION **); void wts_ops(int); void wts_read_scan(void); +void wts_reopen(void); void wts_salvage(void); void wts_stats(void); void wts_verify(const char *); -void die(int, const char *, ...) -#if defined(__GNUC__) -__attribute__((__noreturn__)) -#endif -; - /* * mmrand -- * Return a random value between a min/max pair. diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c new file mode 100644 index 00000000000..937525522fa --- /dev/null +++ b/src/third_party/wiredtiger/test/format/lrt.c @@ -0,0 +1,170 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "format.h" + +/* + * lrt -- + * Start a long-running transaction. + */ +void * +lrt(void *arg) +{ + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_ITEM key, value; + WT_SESSION *session; + size_t buf_len, buf_size; + uint64_t keyno, saved_keyno; + u_int period; + int pinned, ret; + uint8_t bitfield; + void *buf; + + (void)(arg); /* Unused parameter */ + + saved_keyno = 0; /* [-Werror=maybe-uninitialized] */ + + key_gen_setup(&key); + val_gen_setup(NULL, &value); + + buf = NULL; + buf_len = buf_size = 0; + + /* Open a session and cursor. */ + conn = g.wts_conn; + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->open_cursor( + session, g.uri, NULL, NULL, &cursor)); + + for (pinned = 0;;) { + if (pinned) { + /* Re-read the record at the end of the table. */ + while ((ret = read_row( + cursor, &key, &value, saved_keyno)) == WT_ROLLBACK) + ; + if (ret != 0) + testutil_die(ret, + "read_row %" PRIu64, saved_keyno); + + /* Compare the previous value with the current one. */ + if (g.type == FIX) { + ret = cursor->get_value(cursor, &bitfield); + value.data = &bitfield; + value.size = 1; + } else + ret = cursor->get_value(cursor, &value); + if (ret != 0) + testutil_die(ret, + "cursor.get_value: %" PRIu64, saved_keyno); + + if (buf_size != value.size || + memcmp(buf, value.data, value.size) != 0) + testutil_die(0, "mismatched start/stop values"); + + /* End the transaction. */ + testutil_check( + session->commit_transaction(session, NULL)); + + /* Reset the cursor, releasing our pin. */ + testutil_check(cursor->reset(cursor)); + pinned = 0; + } else { + /* + * Begin transaction: without an explicit transaction, + * the snapshot is only kept around while a cursor is + * positioned. As soon as the cursor loses its position + * a new snapshot will be allocated. + */ + testutil_check(session->begin_transaction( + session, "isolation=snapshot")); + + /* Read a record at the end of the table. */ + do { + saved_keyno = mmrand(NULL, + (u_int)(g.key_cnt - g.key_cnt / 10), + (u_int)g.key_cnt); + while ((ret = read_row(cursor, + &key, &value, saved_keyno)) == WT_ROLLBACK) + ; + } while (ret == WT_NOTFOUND); + if (ret != 0) + testutil_die(ret, + "read_row %" PRIu64, saved_keyno); + + /* Copy the cursor's value. */ + if (g.type == FIX) { + ret = cursor->get_value(cursor, &bitfield); + value.data = &bitfield; + value.size = 1; + } else + ret = cursor->get_value(cursor, &value); + if (ret != 0) + testutil_die(ret, + "cursor.get_value: %" PRIu64, saved_keyno); + if (buf_len < value.size) + buf = drealloc(buf, buf_len = value.size); + memcpy(buf, value.data, buf_size = value.size); + + /* + * Move the cursor to an early record in the table, + * hopefully allowing the page with the record just + * retrieved to be evicted from memory. + */ + do { + keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5); + while ((ret = read_row(cursor, + &key, &value, keyno)) == WT_ROLLBACK) + ; + } while (ret == WT_NOTFOUND); + if (ret != 0) + testutil_die(ret, "read_row %" PRIu64, keyno); + + pinned = 1; + } + + /* Sleep for some number of seconds. */ + period = mmrand(NULL, 1, 10); + + /* Sleep for short periods so we don't make the run wait. */ + while (period > 0 && !g.workers_finished) { + --period; + sleep(1); + } + if (g.workers_finished) + break; + } + + testutil_check(session->close(session, NULL)); + + free(key.mem); + free(value.mem); + free(buf); + + return (NULL); +} diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 2a2ff91fbf0..9b2aacaecff 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -28,19 +28,21 @@ #include "format.h" -static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int col_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int nextprev(WT_CURSOR *, int, int *); -static int notfound_chk(const char *, int, int, uint64_t); +static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t); +static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int nextprev(WT_CURSOR *, int); static void *ops(void *); -static void print_item(const char *, WT_ITEM *); -static int read_row(WT_CURSOR *, WT_ITEM *, uint64_t); -static int row_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, int *); -static int row_update(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t); +static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); static void table_append_init(void); +#ifdef HAVE_BERKELEY_DB +static int notfound_chk(const char *, int, int, uint64_t); +static void print_item(const char *, WT_ITEM *); +#endif + /* * wts_ops -- * Perform a number of operations in a set of threads. @@ -51,23 +53,17 @@ wts_ops(int lastrun) TINFO *tinfo, total; WT_CONNECTION *conn; WT_SESSION *session; - pthread_t backup_tid, compact_tid; + pthread_t backup_tid, compact_tid, lrt_tid; int64_t fourths, thread_ops; uint32_t i; - int ret, running; + int running; conn = g.wts_conn; session = NULL; /* -Wconditional-uninitialized */ memset(&backup_tid, 0, sizeof(backup_tid)); memset(&compact_tid, 0, sizeof(compact_tid)); - - /* - * We support replay of threaded runs, but don't log random numbers - * after threaded operations start, there's no point. - */ - if (!SINGLETHREADED) - g.rand_log_stop = 1; + memset(&lrt_tid, 0, sizeof(lrt_tid)); /* * There are two mechanisms to specify the length of the run, a number @@ -87,37 +83,45 @@ wts_ops(int lastrun) if (g.c_timer == 0) fourths = -1; else - fourths = (g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS; + fourths = ((int64_t)g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS; /* Initialize the table extension code. */ table_append_init(); + /* + * We support replay of threaded runs, but don't log random numbers + * after threaded operations start, there's no point. + */ + if (!SINGLETHREADED) + g.rand_log_stop = 1; + /* Open a session. */ if (g.logging != 0) { - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); (void)g.wt_api->msg_printf(g.wt_api, session, "=============== thread ops start ==============="); } /* Create thread structure; start the worker threads. */ - if ((tinfo = calloc((size_t)g.c_threads, sizeof(*tinfo))) == NULL) - die(errno, "calloc"); + tinfo = dcalloc((size_t)g.c_threads, sizeof(*tinfo)); for (i = 0; i < g.c_threads; ++i) { tinfo[i].id = (int)i + 1; tinfo[i].state = TINFO_RUNNING; - if ((ret = - pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i])) != 0) - die(ret, "pthread_create"); + testutil_check( + pthread_create(&tinfo[i].tid, NULL, ops, &tinfo[i])); } - /* If a multi-threaded run, start backup and compaction threads. */ - if (g.c_backups && - (ret = pthread_create(&backup_tid, NULL, backup, NULL)) != 0) - die(ret, "pthread_create: backup"); - if (g.c_compact && - (ret = pthread_create(&compact_tid, NULL, compact, NULL)) != 0) - die(ret, "pthread_create: compaction"); + /* + * If a multi-threaded run, start optional backup, compaction and + * long-running reader threads. + */ + if (g.c_backups) + testutil_check(pthread_create(&backup_tid, NULL, backup, NULL)); + if (g.c_compact) + testutil_check( + pthread_create(&compact_tid, NULL, compact, NULL)); + if (!SINGLETHREADED && g.c_long_running_txn) + testutil_check(pthread_create(&lrt_tid, NULL, lrt, NULL)); /* Spin on the threads, calculating the totals. */ for (;;) { @@ -171,70 +175,247 @@ wts_ops(int lastrun) } free(tinfo); - /* Wait for the backup, compaction thread. */ + /* Wait for the backup, compaction, long-running reader threads. */ g.workers_finished = 1; if (g.c_backups) (void)pthread_join(backup_tid, NULL); if (g.c_compact) (void)pthread_join(compact_tid, NULL); + if (!SINGLETHREADED && g.c_long_running_txn) + (void)pthread_join(lrt_tid, NULL); + g.workers_finished = 0; if (g.logging != 0) { (void)g.wt_api->msg_printf(g.wt_api, session, "=============== thread ops stop ==============="); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); } } /* - * ops_session_config -- - * Return the current session configuration. + * isolation_config -- + * Return an isolation configuration. */ -static const char * -ops_session_config(WT_RAND_STATE *rnd) +static inline const char * +isolation_config(WT_RAND_STATE *rnd, bool *iso_snapshotp) { u_int v; - /* - * The only current session configuration is the isolation level. - */ if ((v = g.c_isolation_flag) == ISOLATION_RANDOM) v = mmrand(rnd, 2, 4); switch (v) { case ISOLATION_READ_UNCOMMITTED: + *iso_snapshotp = false; return ("isolation=read-uncommitted"); case ISOLATION_READ_COMMITTED: + *iso_snapshotp = false; return ("isolation=read-committed"); case ISOLATION_SNAPSHOT: default: + *iso_snapshotp = true; return ("isolation=snapshot"); } } +typedef struct { + uint64_t keyno; /* Row number */ + + void *kdata; /* If an insert, the generated key */ + size_t ksize; + size_t kmemsize; + + void *vdata; /* If not a delete, the value */ + size_t vsize; + size_t vmemsize; + + bool deleted; /* Delete operation */ + bool insert; /* Insert operation */ +} SNAP_OPS; + +/* + * snap_track -- + * Add a single snapshot isolation returned value to the list. + */ +static void +snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value) +{ + snap->keyno = keyno; + if (key == NULL) + snap->insert = false; + else { + snap->insert = true; + + if (snap->kmemsize < key->size) { + snap->kdata = drealloc(snap->kdata, key->size); + snap->kmemsize = key->size; + } + memcpy(snap->kdata, key->data, snap->ksize = key->size); + } + if (value == NULL) + snap->deleted = true; + else { + snap->deleted = false; + if (snap->vmemsize < value->size) { + snap->vdata = drealloc(snap->vdata, value->size); + snap->vmemsize = value->size; + } + memcpy(snap->vdata, value->data, snap->vsize = value->size); + } +} + +/* + * snap_check -- + * Check snapshot isolation operations are repeatable. + */ +static int +snap_check(WT_CURSOR *cursor, + SNAP_OPS *start, SNAP_OPS *stop, WT_ITEM *key, WT_ITEM *value) +{ + WT_DECL_RET; + SNAP_OPS *p; + uint8_t bitfield; + + for (; start < stop; ++start) { + /* Check for subsequent changes to this record. */ + for (p = start + 1; p < stop && p->keyno != start->keyno; ++p) + ; + if (p != stop) + continue; + + /* + * Retrieve the key/value pair by key. Row-store inserts have a + * unique generated key we saved, else generate the key from the + * key number. + */ + if (start->insert == 0) { + switch (g.type) { + case FIX: + case VAR: + cursor->set_key(cursor, start->keyno); + break; + case ROW: + key_gen(key, start->keyno); + cursor->set_key(cursor, key); + break; + } + } else { + key->data = start->kdata; + key->size = start->ksize; + cursor->set_key(cursor, key); + } + if ((ret = cursor->search(cursor)) == 0) { + if (g.type == FIX) { + testutil_check( + cursor->get_value(cursor, &bitfield)); + *(uint8_t *)(value->data) = bitfield; + value->size = 1; + } else + testutil_check( + cursor->get_value(cursor, value)); + } else + if (ret != WT_NOTFOUND) + return (ret); + + /* Check for simple matches. */ + if (ret == 0 && !start->deleted && + value->size == start->vsize && + memcmp(value->data, start->vdata, value->size) == 0) + continue; + if (ret == WT_NOTFOUND && start->deleted) + continue; + + /* + * In fixed length stores, zero values at the end of the key + * space are returned as not-found, and not-found row reads + * are saved as zero values. Map back-and-forth for simplicity. + */ + if (g.type == FIX) { + if (ret == WT_NOTFOUND && + start->vsize == 1 && *(uint8_t *)start->vdata == 0) + continue; + if (start->deleted && + value->size == 1 && *(uint8_t *)value->data == 0) + continue; + } + + /* Things went pear-shaped. */ + switch (g.type) { + case FIX: + testutil_die(ret, + "snapshot-isolation: %" PRIu64 " search: " + "expected {0x%02x}, found {0x%02x}", + start->keyno, + start->deleted ? 0 : *(uint8_t *)start->vdata, + ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data); + /* NOTREACHED */ + case ROW: + testutil_die(ret, + "snapshot-isolation: %.*s search: " + "expected {%.*s}, found {%.*s}", + (int)key->size, key->data, + start->deleted ? + (int)strlen("deleted") : (int)start->vsize, + start->deleted ? "deleted" : start->vdata, + ret == WT_NOTFOUND ? + (int)strlen("deleted") : (int)value->size, + ret == WT_NOTFOUND ? "deleted" : value->data); + /* NOTREACHED */ + case VAR: + testutil_die(ret, + "snapshot-isolation: %" PRIu64 " search: " + "expected {%.*s}, found {%.*s}", + start->keyno, + start->deleted ? + (int)strlen("deleted") : (int)start->vsize, + start->deleted ? "deleted" : start->vdata, + ret == WT_NOTFOUND ? + (int)strlen("deleted") : (int)value->size, + ret == WT_NOTFOUND ? "deleted" : value->data); + /* NOTREACHED */ + } + } + return (0); +} + +/* + * ops -- + * Per-thread operations. + */ static void * ops(void *arg) { + SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor, *cursor_insert; + WT_DECL_RET; + WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - WT_ITEM key, value; uint64_t keyno, ckpt_op, session_op; - uint32_t op; - uint8_t *keybuf, *valbuf; - u_int np; - int ckpt_available, dir, insert, intxn, notfound, readonly, ret; + uint32_t op, rnd; + u_int i; + int dir; char *ckpt_config, ckpt_name[64]; + bool ckpt_available, intxn, iso_snapshot, positioned, readonly; tinfo = arg; conn = g.wts_conn; - keybuf = valbuf = NULL; - readonly = 0; /* -Wconditional-uninitialized */ + readonly = false; /* -Wconditional-uninitialized */ + + /* Initialize tracking of snapshot isolation transaction returns. */ + snap = NULL; + iso_snapshot = false; + memset(snap_list, 0, sizeof(snap_list)); + + /* Initialize the per-thread random number generator. */ + __wt_random_init(&tinfo->rnd); /* Set up the default key and value buffers. */ - key_gen_setup(&keybuf); - val_gen_setup(&tinfo->rnd, &valbuf); + key = &_key; + key_gen_setup(key); + value = &_value; + val_gen_setup(&tinfo->rnd, value); /* Set the first operation where we'll create sessions and cursors. */ session_op = 0; @@ -243,32 +424,29 @@ ops(void *arg) /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; - ckpt_available = 0; + ckpt_available = false; - for (intxn = 0; !tinfo->quit; ++tinfo->ops) { + for (intxn = false; !tinfo->quit; ++tinfo->ops) { /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. */ if (intxn && (tinfo->ops == ckpt_op || tinfo->ops == session_op)) { - if ((ret = session->commit_transaction( - session, NULL)) != 0) - die(ret, "session.commit_transaction"); + testutil_check( + session->commit_transaction(session, NULL)); ++tinfo->commit; - intxn = 0; + intxn = false; } /* Open up a new session and cursors. */ if (tinfo->ops == session_op || session == NULL || cursor == NULL) { - if (session != NULL && - (ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + if (session != NULL) + testutil_check(session->close(session, NULL)); - if ((ret = conn->open_session(conn, NULL, - ops_session_config(&tinfo->rnd), &session)) != 0) - die(ret, "connection.open_session"); + testutil_check( + conn->open_session(conn, NULL, NULL, &session)); /* * 10% of the time, perform some read-only operations @@ -283,15 +461,14 @@ ops(void *arg) */ if (!SINGLETHREADED && !DATASOURCE("lsm") && ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) { - if ((ret = session->open_cursor(session, - g.uri, NULL, ckpt_name, &cursor)) != 0) - die(ret, "session.open_cursor"); + testutil_check(session->open_cursor(session, + g.uri, NULL, ckpt_name, &cursor)); /* Pick the next session/cursor close/open. */ session_op += 250; /* Checkpoints are read-only. */ - readonly = 1; + readonly = true; } else { /* * Open two cursors: one for overwriting and one @@ -306,33 +483,43 @@ ops(void *arg) * want to have to specify the record number, * which requires an append configuration. */ - if ((ret = session->open_cursor(session, g.uri, - NULL, "overwrite", &cursor)) != 0) - die(ret, "session.open_cursor"); - if ((g.type == FIX || g.type == VAR) && - (ret = session->open_cursor(session, g.uri, - NULL, "append", &cursor_insert)) != 0) - die(ret, "session.open_cursor"); + testutil_check(session->open_cursor(session, + g.uri, NULL, "overwrite", &cursor)); + if (g.type == FIX || g.type == VAR) + testutil_check(session->open_cursor( + session, g.uri, + NULL, "append", &cursor_insert)); /* Pick the next session/cursor close/open. */ - session_op += 100 * mmrand(&tinfo->rnd, 1, 50); + session_op += mmrand(&tinfo->rnd, 100, 5000); /* Updates supported. */ - readonly = 0; + readonly = false; } } /* Checkpoint the database. */ if (tinfo->ops == ckpt_op && g.c_checkpoints) { /* - * LSM and data-sources don't support named checkpoints, + * Checkpoints are single-threaded inside WiredTiger, + * skip our checkpoint if another thread is already + * doing one. + */ + ret = pthread_rwlock_trywrlock(&g.checkpoint_lock); + if (ret == EBUSY) + goto skip_checkpoint; + testutil_check(ret); + + /* + * LSM and data-sources don't support named checkpoints * and we can't drop a named checkpoint while there's a - * cursor open on it, otherwise 20% of the time name the - * checkpoint. + * backup in progress, otherwise name the checkpoint 5% + * of the time. */ - if (DATASOURCE("helium") || DATASOURCE("kvsbdb") || - DATASOURCE("lsm") || readonly || - mmrand(&tinfo->rnd, 1, 5) == 1) + if (mmrand(&tinfo->rnd, 1, 20) != 1 || + DATASOURCE("helium") || + DATASOURCE("kvsbdb") || DATASOURCE("lsm") || + pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY) ckpt_config = NULL; else { (void)snprintf(ckpt_name, sizeof(ckpt_name), @@ -340,22 +527,22 @@ ops(void *arg) ckpt_config = ckpt_name; } - /* Named checkpoints lock out backups */ - if (ckpt_config != NULL && - (ret = pthread_rwlock_wrlock(&g.backup_lock)) != 0) - die(ret, - "pthread_rwlock_wrlock: backup lock"); - - if ((ret = - session->checkpoint(session, ckpt_config)) != 0) - die(ret, "session.checkpoint%s%s", - ckpt_config == NULL ? "" : ": ", + ret = session->checkpoint(session, ckpt_config); + /* + * We may be trying to create a named checkpoint while + * we hold a cursor open to the previous checkpoint. + * Tolerate EBUSY. + */ + if (ret != 0 && ret != EBUSY) + testutil_die(ret, "%s", ckpt_config == NULL ? "" : ckpt_config); + ret = 0; - if (ckpt_config != NULL && - (ret = pthread_rwlock_unlock(&g.backup_lock)) != 0) - die(ret, - "pthread_rwlock_wrlock: backup lock"); + if (ckpt_config != NULL) + testutil_check( + pthread_rwlock_unlock(&g.backup_lock)); + testutil_check( + pthread_rwlock_unlock(&g.checkpoint_lock)); /* Rephrase the checkpoint name for cursor open. */ if (ckpt_config == NULL) @@ -364,29 +551,31 @@ ops(void *arg) else (void)snprintf(ckpt_name, sizeof(ckpt_name), "checkpoint=thread-%d", tinfo->id); - ckpt_available = 1; + ckpt_available = true; - /* Pick the next checkpoint operation. */ - ckpt_op += 1000 * mmrand(&tinfo->rnd, 5, 20); +skip_checkpoint: /* Pick the next checkpoint operation. */ + ckpt_op += mmrand(&tinfo->rnd, 5000, 20000); } /* - * If we're not single-threaded and we're not in a transaction, - * start a transaction 20% of the time. + * If we're not single-threaded and not in a transaction, choose + * an isolation level and start a transaction some percentage of + * the time. */ - if (!SINGLETHREADED && !intxn && - mmrand(&tinfo->rnd, 1, 10) >= 8) { - if ((ret = - session->begin_transaction(session, NULL)) != 0) - die(ret, "session.begin_transaction"); - intxn = 1; + if (!SINGLETHREADED && + !intxn && mmrand(&tinfo->rnd, 1, 100) >= g.c_txn_freq) { + testutil_check( + session->reconfigure(session, + isolation_config(&tinfo->rnd, &iso_snapshot))); + testutil_check( + session->begin_transaction(session, NULL)); + + snap = iso_snapshot ? snap_list : NULL; + intxn = true; } - insert = notfound = 0; - - keyno = mmrand(&tinfo->rnd, 1, g.rows); - key.data = keybuf; - value.data = valbuf; + keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); + positioned = false; /* * Perform some number of operations: the percentage of deletes, @@ -395,32 +584,35 @@ ops(void *arg) * of deletes will mean fewer inserts and writes. Modifications * are always followed by a read to confirm it worked. */ - op = readonly ? UINT32_MAX : (uint32_t)(rng(&tinfo->rnd) % 100); + op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); if (op < g.c_delete_pct) { ++tinfo->remove; switch (g.type) { case ROW: - /* - * If deleting a non-existent record, the cursor - * won't be positioned, and so can't do a next. - */ - if (row_remove(cursor, &key, keyno, ¬found)) - goto deadlock; + ret = row_remove(cursor, key, keyno); break; case FIX: case VAR: - if (col_remove(cursor, &key, keyno, ¬found)) - goto deadlock; + ret = col_remove(cursor, key, keyno); break; } + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, NULL); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + } } else if (op < g.c_delete_pct + g.c_insert_pct) { ++tinfo->insert; switch (g.type) { case ROW: - if (row_insert( - tinfo, cursor, &key, &value, keyno)) - goto deadlock; - insert = 1; + key_gen_insert(&tinfo->rnd, key, keyno); + val_gen(&tinfo->rnd, value, keyno); + ret = row_insert(cursor, key, value, keyno); break; case FIX: case VAR: @@ -433,38 +625,60 @@ ops(void *arg) goto skip_insert; /* Insert, then reset the insert cursor. */ - if (col_insert(tinfo, - cursor_insert, &key, &value, &keyno)) - goto deadlock; - if ((ret = - cursor_insert->reset(cursor_insert)) != 0) - die(ret, "cursor.reset"); - - insert = 1; + val_gen(&tinfo->rnd, value, g.rows + 1); + ret = col_insert( + cursor_insert, key, value, &keyno); + testutil_check( + cursor_insert->reset(cursor_insert)); break; } + positioned = false; + if (ret == 0) { + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, + g.type == ROW ? key : NULL, value); + } else + if (ret == WT_ROLLBACK && intxn) + goto deadlock; } else if ( op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { ++tinfo->update; switch (g.type) { case ROW: - if (row_update( - tinfo, cursor, &key, &value, keyno)) - goto deadlock; + key_gen(key, keyno); + val_gen(&tinfo->rnd, value, keyno); + ret = row_update(cursor, key, value, keyno); break; case FIX: case VAR: -skip_insert: if (col_update( - tinfo, cursor, &key, &value, keyno)) - goto deadlock; +skip_insert: val_gen(&tinfo->rnd, value, keyno); + ret = col_update(cursor, key, value, keyno); break; } + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + } } else { ++tinfo->search; - if (read_row(cursor, &key, keyno)) - if (intxn) + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (snap != NULL && (size_t) + (snap - snap_list) < WT_ELEMENTS(snap_list)) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) goto deadlock; - continue; + } } /* @@ -472,59 +686,64 @@ skip_insert: if (col_update( * insert, do a small number of next/prev cursor operations in * a random direction. */ - if (!insert) { + if (positioned) { dir = (int)mmrand(&tinfo->rnd, 0, 1); - for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) { - if (notfound) - break; - if (nextprev(cursor, dir, ¬found)) + for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) { + if ((ret = nextprev(cursor, dir)) == 0) + continue; + if (ret == WT_ROLLBACK && intxn) goto deadlock; + break; } } - /* Read to confirm the operation. */ - ++tinfo->search; - if (read_row(cursor, &key, keyno)) - goto deadlock; - /* Reset the cursor: there is no reason to keep pages pinned. */ - if (cursor != NULL && (ret = cursor->reset(cursor)) != 0) - die(ret, "cursor.reset"); + testutil_check(cursor->reset(cursor)); /* - * If we're in the transaction, commit 40% of the time and + * If we're in a transaction, commit 40% of the time and * rollback 10% of the time. */ - if (intxn) - switch (mmrand(&tinfo->rnd, 1, 10)) { - case 1: case 2: case 3: case 4: /* 40% */ - if ((ret = session->commit_transaction( - session, NULL)) != 0) - die(ret, "session.commit_transaction"); - ++tinfo->commit; - intxn = 0; - break; - case 5: /* 10% */ - if (0) { -deadlock: ++tinfo->deadlock; - } - if ((ret = session->rollback_transaction( - session, NULL)) != 0) - die(ret, - "session.rollback_transaction"); - ++tinfo->rollback; - intxn = 0; - break; - default: - break; + if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5) + continue; + + /* + * Ending the transaction. If in snapshot isolation, repeat the + * operations and confirm they're unchanged. + */ + if (snap != NULL && (ret = snap_check( + cursor, snap_list, snap, key, value)) == WT_ROLLBACK) + goto deadlock; + + switch (rnd) { + case 1: case 2: case 3: case 4: /* 40% */ + testutil_check( + session->commit_transaction(session, NULL)); + ++tinfo->commit; + break; + case 5: /* 10% */ + if (0) { +deadlock: ++tinfo->deadlock; } + testutil_check( + session->rollback_transaction(session, NULL)); + ++tinfo->rollback; + break; + } + + intxn = false; + snap = NULL; } - if (session != NULL && (ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + if (session != NULL) + testutil_check(session->close(session, NULL)); - free(keybuf); - free(valbuf); + for (i = 0; i < WT_ELEMENTS(snap_list); ++i) { + free(snap_list[i].kdata); + free(snap_list[i].vdata); + } + free(key->mem); + free(value->mem); tinfo->state = TINFO_COMPLETE; return (NULL); @@ -539,56 +758,59 @@ wts_read_scan(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; - WT_ITEM key; + WT_DECL_RET; + WT_ITEM key, value; WT_SESSION *session; - uint64_t cnt, last_cnt; - uint8_t *keybuf; - int ret; + uint64_t keyno, last_keyno; conn = g.wts_conn; - /* Set up the default key buffer. */ - key_gen_setup(&keybuf); + /* Set up the default key/value buffers. */ + key_gen_setup(&key); + val_gen_setup(NULL, &value); /* Open a session and cursor pair. */ - if ((ret = conn->open_session( - conn, NULL, ops_session_config(NULL), &session)) != 0) - die(ret, "connection.open_session"); - if ((ret = session->open_cursor( - session, g.uri, NULL, NULL, &cursor)) != 0) - die(ret, "session.open_cursor"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, g.uri, NULL, NULL, &cursor)); /* Check a random subset of the records using the key. */ - for (last_cnt = cnt = 0; cnt < g.key_cnt;) { - cnt += rng(NULL) % 17 + 1; - if (cnt > g.rows) - cnt = g.rows; - if (cnt - last_cnt > 1000) { - track("read row scan", cnt, NULL); - last_cnt = cnt; + for (last_keyno = keyno = 0; keyno < g.key_cnt;) { + keyno += mmrand(NULL, 1, 17); + if (keyno > g.rows) + keyno = g.rows; + if (keyno - last_keyno > 1000) { + track("read row scan", keyno, NULL); + last_keyno = keyno; } - key.data = keybuf; - if ((ret = read_row(cursor, &key, cnt)) != 0) - die(ret, "read_scan"); + switch (ret = read_row(cursor, &key, &value, keyno)) { + case 0: + case WT_NOTFOUND: + case WT_ROLLBACK: + break; + default: + testutil_die( + ret, "wts_read_scan: read row %" PRIu64, keyno); + } } - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); - free(keybuf); + free(key.mem); + free(value.mem); } /* * read_row -- * Read and verify a single element in a row- or column-store file. */ -static int -read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +int +read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { - WT_ITEM bdb_value, value; + static int sn = 0; WT_SESSION *session; - int notfound, ret; + int exact, ret; uint8_t bitfield; session = cursor->session; @@ -605,56 +827,74 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) cursor->set_key(cursor, keyno); break; case ROW: - key_gen((uint8_t *)key->data, &key->size, keyno); + key_gen(key, keyno); cursor->set_key(cursor, key); break; } - if ((ret = cursor->search(cursor)) == 0) { + if (sn) { + ret = cursor->search_near(cursor, &exact); + if (ret == 0 && exact != 0) + ret = WT_NOTFOUND; + sn = 0; + } else { + ret = cursor->search(cursor); + sn = 1; + } + switch (ret) { + case 0: if (g.type == FIX) { - ret = cursor->get_value(cursor, &bitfield); - value.data = &bitfield; - value.size = 1; - } else { - ret = cursor->get_value(cursor, &value); + testutil_check(cursor->get_value(cursor, &bitfield)); + *(uint8_t *)(value->data) = bitfield; + value->size = 1; + } else + testutil_check(cursor->get_value(cursor, value)); + break; + case WT_NOTFOUND: + /* + * In fixed length stores, zero values at the end of the key + * space are returned as not found. Treat this the same as + * a zero value in the key space, to match BDB's behavior. + */ + if (g.type == FIX) { + *(uint8_t *)(value->data) = 0; + value->size = 1; + ret = 0; } - } - if (ret == WT_ROLLBACK) + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "read_row: read row %" PRIu64, keyno); - /* - * In fixed length stores, zero values at the end of the key space are - * returned as not found. Treat this the same as a zero value in the - * key space, to match BDB's behavior. - */ - if (ret == WT_NOTFOUND && g.type == FIX) { - bitfield = 0; - value.data = &bitfield; - value.size = 1; - ret = 0; + default: + testutil_die(ret, "read_row: read row %" PRIu64, keyno); } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); /* Retrieve the BDB value. */ + { + WT_ITEM bdb_value; + int notfound; + bdb_read(keyno, &bdb_value.data, &bdb_value.size, ¬found); /* Check for not-found status. */ if (notfound_chk("read_row", ret, notfound, keyno)) - return (0); + return (ret); /* Compare the two. */ - if (value.size != bdb_value.size || - memcmp(value.data, bdb_value.data, value.size) != 0) { + if (value->size != bdb_value.size || + memcmp(value->data, bdb_value.data, value->size) != 0) { fprintf(stderr, - "read_row: read row value mismatch %" PRIu64 ":\n", keyno); + "read_row: value mismatch %" PRIu64 ":\n", keyno); print_item("bdb", &bdb_value); - print_item(" wt", &value); - die(0, NULL); + print_item(" wt", value); + testutil_die(0, NULL); } - return (0); + } +#endif + return (ret); } /* @@ -662,24 +902,19 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) * Read and verify the next/prev element in a row- or column-store file. */ static int -nextprev(WT_CURSOR *cursor, int next, int *notfoundp) +nextprev(WT_CURSOR *cursor, int next) { - WT_ITEM key, value, bdb_key, bdb_value; - WT_SESSION *session; + WT_DECL_RET; + WT_ITEM key, value; uint64_t keyno; - int notfound, ret; uint8_t bitfield; const char *which; - char *p; - session = cursor->session; + keyno = 0; which = next ? "next" : "prev"; - keyno = 0; - ret = next ? cursor->next(cursor) : cursor->prev(cursor); - if (ret == WT_ROLLBACK) - return (WT_ROLLBACK); - if (ret == 0) + switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) { + case 0: switch (g.type) { case FIX: if ((ret = cursor->get_key(cursor, &keyno)) == 0 && @@ -697,19 +932,35 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) ret = cursor->get_value(cursor, &value); break; } - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "%s", which); - *notfoundp = (ret == WT_NOTFOUND); + if (ret != 0) + testutil_die(ret, "nextprev: get_key/get_value"); + break; + case WT_NOTFOUND: + break; + case WT_ROLLBACK: + return (WT_ROLLBACK); + default: + testutil_die(ret, "%s", which); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); + + { + WT_ITEM bdb_key, bdb_value; + WT_SESSION *session; + int notfound; + char *p; + + session = cursor->session; /* Retrieve the BDB value. */ bdb_np(next, &bdb_key.data, &bdb_key.size, &bdb_value.data, &bdb_value.size, ¬found); if (notfound_chk( next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno)) - return (0); + return (ret); /* Compare the two. */ if (g.type == ROW) { @@ -718,7 +969,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) fprintf(stderr, "nextprev: %s key mismatch:\n", which); print_item("bdb-key", &bdb_key); print_item(" wt-key", &key); - die(0, NULL); + testutil_die(0, NULL); } } else { if (keyno != (uint64_t)atoll(bdb_key.data)) { @@ -728,7 +979,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) "nextprev: %s key mismatch: %.*s != %" PRIu64 "\n", which, (int)bdb_key.size, (char *)bdb_key.data, keyno); - die(0, NULL); + testutil_die(0, NULL); } } if (value.size != bdb_value.size || @@ -736,7 +987,7 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) fprintf(stderr, "nextprev: %s value mismatch:\n", which); print_item("bdb-value", &bdb_value); print_item(" wt-value", &value); - die(0, NULL); + testutil_die(0, NULL); } if (g.logging == LOG_OPS) @@ -758,7 +1009,9 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) keyno, (int)value.size, (char *)value.data); break; } - return (0); + } +#endif + return (ret); } /* @@ -766,37 +1019,38 @@ nextprev(WT_CURSOR *cursor, int next, int *notfoundp) * Update a row in a row-store file. */ static int -row_update(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int notfound, ret; session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno); - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s{%.*s}\n%-10s{%.*s}", - "putK", (int)key->size, (char *)key->data, - "putV", (int)value->size, (char *)value->data); + "%-10s{%.*s}, {%.*s}", + "put", + (int)key->size, key->data, (int)value->size, value->data); cursor->set_key(cursor, key); cursor->set_value(cursor, value); - ret = cursor->update(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->update(cursor)) { + case 0: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "row_update: update row %" PRIu64 " by key", keyno); + default: + testutil_die(ret, + "row_update: update row %" PRIu64 " by key", keyno); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("row_update", ret, notfound, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#endif return (0); } @@ -805,16 +1059,13 @@ row_update(TINFO *tinfo, * Update a row in a column-store file. */ static int -col_update(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int notfound, ret; session = cursor->session; - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) { if (g.type == FIX) @@ -834,18 +1085,24 @@ col_update(TINFO *tinfo, cursor->set_value(cursor, *(uint8_t *)value->data); else cursor->set_value(cursor, value); - ret = cursor->update(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->update(cursor)) { + case 0: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "col_update: %" PRIu64, keyno); + default: + testutil_die(ret, "col_update: %" PRIu64, keyno); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("col_update", ret, notfound, keyno); + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#else + (void)key; /* [-Wunused-variable] */ +#endif return (0); } @@ -860,12 +1117,8 @@ table_append_init(void) g.append_max = (size_t)g.c_threads * 10; g.append_cnt = 0; - if (g.append != NULL) { - free(g.append); - g.append = NULL; - } - if ((g.append = calloc(g.append_max, sizeof(uint64_t))) == NULL) - die(errno, "calloc"); + free(g.append); + g.append = dcalloc(g.append_max, sizeof(uint64_t)); } /* @@ -876,13 +1129,13 @@ static void table_append(uint64_t keyno) { uint64_t *p, *ep; - int done, ret; + int done; ep = g.append + g.append_max; /* * We don't want to ignore records we append, which requires we update - * the "last row" as we insert new records. Threads allocating record + * the "last row" as we insert new records. Threads allocating record * numbers can race with other threads, so the thread allocating record * N may return after the thread allocating N + 1. We can't update a * record before it's been inserted, and so we can't leave gaps when the @@ -900,15 +1153,14 @@ table_append(uint64_t keyno) * to sleep (so the append table fills up), then N threads of control * used the same g.append_cnt value to decide there was an available * slot in the append table and both allocated new records, we could run - * out of space in the table. It's unfortunately not even unlikely in + * out of space in the table. It's unfortunately not even unlikely in * the case of a large number of threads all inserting as fast as they * can and a single thread going to sleep for an unexpectedly long time. * If it happens, sleep and retry until earlier records are resolved * and we find a slot. */ for (done = 0;;) { - if ((ret = pthread_rwlock_wrlock(&g.append_lock)) != 0) - die(ret, "pthread_rwlock_wrlock: append_lock"); + testutil_check(pthread_rwlock_wrlock(&g.append_lock)); /* * If this is the thread we've been waiting for, and its record @@ -945,8 +1197,7 @@ table_append(uint64_t keyno) break; } - if ((ret = pthread_rwlock_unlock(&g.append_lock)) != 0) - die(ret, "pthread_rwlock_unlock: append_lock"); + testutil_check(pthread_rwlock_unlock(&g.append_lock)); if (done) break; @@ -959,37 +1210,38 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int notfound, ret; session = cursor->session; - key_gen_insert(&tinfo->rnd, (uint8_t *)key->data, &key->size, keyno); - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, session, - "%-10s{%.*s}\n%-10s{%.*s}", - "insertK", (int)key->size, (char *)key->data, - "insertV", (int)value->size, (char *)value->data); + "%-10s{%.*s}, {%.*s}", + "insert", + (int)key->size, key->data, (int)value->size, value->data); cursor->set_key(cursor, key); cursor->set_value(cursor, value); - ret = cursor->insert(cursor); - if (ret == WT_ROLLBACK) + switch (ret = cursor->insert(cursor)) { + case 0: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "row_insert: insert row %" PRIu64 " by key", keyno); + default: + testutil_die(ret, + "row_insert: insert row %" PRIu64 " by key", keyno); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - bdb_update(key->data, key->size, value->data, value->size, ¬found); - (void)notfound_chk("row_insert", ret, notfound, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#endif return (0); } @@ -998,28 +1250,27 @@ row_insert(TINFO *tinfo, * Insert an element in a column-store file. */ static int -col_insert(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { + WT_DECL_RET; WT_SESSION *session; uint64_t keyno; - int notfound, ret; session = cursor->session; - val_gen(&tinfo->rnd, (uint8_t *)value->data, &value->size, g.rows + 1); - if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); else cursor->set_value(cursor, value); - if ((ret = cursor->insert(cursor)) != 0) { - if (ret == WT_ROLLBACK) - return (WT_ROLLBACK); - die(ret, "cursor.insert"); + switch (ret = cursor->insert(cursor)) { + case 0: + break; + case WT_ROLLBACK: + return (WT_ROLLBACK); + default: + testutil_die(ret, "cursor.insert"); } - if ((ret = cursor->get_key(cursor, &keyno)) != 0) - die(ret, "cursor.get_key"); + testutil_check(cursor->get_key(cursor, &keyno)); *keynop = (uint32_t)keyno; table_append(keyno); /* Extend the object. */ @@ -1037,11 +1288,15 @@ col_insert(TINFO *tinfo, (int)value->size, (char *)value->data); } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, value->data, value->size, ¬found); + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#else + (void)key; /* [-Wunused-variable] */ +#endif return (0); } @@ -1050,14 +1305,14 @@ col_insert(TINFO *tinfo, * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) +row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int notfound, ret; session = cursor->session; - key_gen((uint8_t *)key->data, &key->size, keyno); + key_gen(key, keyno); /* Log the operation */ if (g.logging == LOG_OPS) @@ -1068,18 +1323,31 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); - if (ret == WT_ROLLBACK) + switch (ret) { + case 0: + case WT_NOTFOUND: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "row_remove: remove %" PRIu64 " by key", keyno); - *notfoundp = (ret == WT_NOTFOUND); + default: + testutil_die(ret, + "row_remove: remove %" PRIu64 " by key", keyno); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); + + { + int notfound; bdb_remove(keyno, ¬found); (void)notfound_chk("row_remove", ret, notfound, keyno); - return (0); + } +#else + (void)key; /* [-Wunused-variable] */ +#endif + return (ret); } /* @@ -1087,10 +1355,10 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) +col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) { + WT_DECL_RET; WT_SESSION *session; - int notfound, ret; session = cursor->session; @@ -1103,28 +1371,41 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp) /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); - if (ret == WT_ROLLBACK) + switch (ret) { + case 0: + case WT_NOTFOUND: + break; + case WT_ROLLBACK: return (WT_ROLLBACK); - if (ret != 0 && ret != WT_NOTFOUND) - die(ret, "col_remove: remove %" PRIu64 " by key", keyno); - *notfoundp = (ret == WT_NOTFOUND); + default: + testutil_die(ret, + "col_remove: remove %" PRIu64 " by key", keyno); + } +#ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) - return (0); + return (ret); /* * Deleting a fixed-length item is the same as setting the bits to 0; * do the same thing for the BDB store. */ if (g.type == FIX) { - key_gen((uint8_t *)key->data, &key->size, keyno); - bdb_update(key->data, key->size, "\0", 1, ¬found); - } else + key_gen(key, keyno); + bdb_update(key->data, key->size, "\0", 1); + } else { + int notfound; + bdb_remove(keyno, ¬found); - (void)notfound_chk("col_remove", ret, notfound, keyno); - return (0); + (void)notfound_chk("col_remove", ret, notfound, keyno); + } +#else + (void)key; /* [-Wunused-variable] */ +#endif + return (ret); } +#ifdef HAVE_BERKELEY_DB /* * notfound_chk -- * Compare notfound returns for consistency. @@ -1142,7 +1423,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, " not found in Berkeley DB, found in WiredTiger\n"); - die(0, NULL); + testutil_die(0, NULL); } if (wt_ret == WT_NOTFOUND) { fprintf(stderr, "%s: %s:", g.progname, f); @@ -1150,7 +1431,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, " found in Berkeley DB, not found in WiredTiger\n"); - die(0, NULL); + testutil_die(0, NULL); } return (0); } @@ -1165,7 +1446,7 @@ print_item(const char *tag, WT_ITEM *item) static const char hex[] = "0123456789abcdef"; const uint8_t *data; size_t size; - int ch; + u_char ch; data = item->data; size = item->size; @@ -1185,3 +1466,4 @@ print_item(const char *tag, WT_ITEM *item) } fprintf(stderr, "}\n"); } +#endif diff --git a/src/third_party/wiredtiger/test/format/salvage.c b/src/third_party/wiredtiger/test/format/salvage.c index ca28e4b60d8..69805fb1018 100644 --- a/src/third_party/wiredtiger/test/format/salvage.c +++ b/src/third_party/wiredtiger/test/format/salvage.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -36,18 +36,16 @@ static void salvage(void) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; - int ret; conn = g.wts_conn; track("salvage", 0ULL, NULL); - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); if ((ret = session->salvage(session, g.uri, "force=true")) != 0) - die(ret, "session.salvage: %s", g.uri); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_die(ret, "session.salvage: %s", g.uri); + testutil_check(session->close(session, NULL)); } /* @@ -101,37 +99,37 @@ corrupt(void) return (0); found: if (fstat(fd, &sb) == -1) - die(errno, "salvage-corrupt: fstat"); + testutil_die(errno, "salvage-corrupt: fstat"); - offset = mmrand(NULL, 0, sb.st_size); + offset = mmrand(NULL, 0, (u_int)sb.st_size); len = (size_t)(20 + (sb.st_size / 100) * 2); (void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home); if ((fp = fopen(buf, "w")) == NULL) - die(errno, "salvage-corrupt: open: %s", buf); + testutil_die(errno, "salvage-corrupt: open: %s", buf); (void)fprintf(fp, - "salvage-corrupt: offset %" PRIuMAX ", length " SIZET_FMT "\n", + "salvage-corrupt: offset %" PRIuMAX ", length %" WT_SIZET_FMT "\n", (uintmax_t)offset, len); - (void)fclose(fp); + fclose_and_clear(&fp); if (lseek(fd, offset, SEEK_SET) == -1) - die(errno, "salvage-corrupt: lseek"); + testutil_die(errno, "salvage-corrupt: lseek"); memset(buf, 'z', sizeof(buf)); for (; len > 0; len -= nw) { nw = (size_t)(len > sizeof(buf) ? sizeof(buf) : len); if (write(fd, buf, nw) == -1) - die(errno, "salvage-corrupt: write"); + testutil_die(errno, "salvage-corrupt: write"); } if (close(fd) == -1) - die(errno, "salvage-corrupt: close"); + testutil_die(errno, "salvage-corrupt: close"); /* * Save a copy of the corrupted file so we can replay the salvage step * as necessary. */ if ((ret = system(copycmd)) != 0) - die(ret, "salvage corrupt copy step failed"); + testutil_die(ret, "salvage corrupt copy step failed"); return (1); } @@ -143,21 +141,24 @@ found: if (fstat(fd, &sb) == -1) void wts_salvage(void) { - int ret; + WT_DECL_RET; /* Some data-sources don't support salvage. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) return; + if (g.c_salvage == 0) + return; + /* * Save a copy of the interesting files so we can replay the salvage * step as necessary. */ if ((ret = system(g.home_salvage_copy)) != 0) - die(ret, "salvage copy step failed"); + testutil_die(ret, "salvage copy step failed"); /* Salvage, then verify. */ - wts_open(g.home, 1, &g.wts_conn); + wts_open(g.home, true, &g.wts_conn); salvage(); wts_verify("post-salvage verify"); wts_close(); @@ -173,7 +174,7 @@ wts_salvage(void) /* Corrupt the file randomly, salvage, then verify. */ if (corrupt()) { - wts_open(g.home, 1, &g.wts_conn); + wts_open(g.home, true, &g.wts_conn); salvage(); wts_verify("post-corrupt-salvage verify"); wts_close(); diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh index fe53f64229f..0c86b5e57c6 100755 --- a/src/third_party/wiredtiger/test/format/smoke.sh +++ b/src/third_party/wiredtiger/test/format/smoke.sh @@ -1,9 +1,11 @@ #! /bin/sh +set -e + # Smoke-test format as part of running "make check". -args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none" +args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none" -./t $args file_type=fix || exit 1 -./t $args file_type=row || exit 1 -./t $args file_type=row data_source=lsm || exit 1 -./t $args file_type=var || exit 1 +$TEST_WRAPPER ./t $args file_type=fix +$TEST_WRAPPER ./t $args file_type=row +$TEST_WRAPPER ./t $args file_type=row data_source=lsm +$TEST_WRAPPER ./t $args file_type=var diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c index 21dc02ce522..da8a496f4ee 100644 --- a/src/third_party/wiredtiger/test/format/t.c +++ b/src/third_party/wiredtiger/test/format/t.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -30,8 +30,10 @@ GLOBAL g; +static void format_die(void); static void startup(void); -static void usage(void); +static void usage(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_optind; extern char *__wt_optarg; @@ -40,15 +42,21 @@ int main(int argc, char *argv[]) { time_t start; - int ch, i, reps, ret; + int ch, onerun, reps; const char *config, *home; + custom_die = format_die; /* Local death handler. */ + config = NULL; - if ((g.progname = strrchr(argv[0], '/')) == NULL) +#ifdef _WIN32 + g.progname = "t_format.exe"; +#else + if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) g.progname = argv[0]; else ++g.progname; +#endif #if 0 /* Configure the GNU malloc for debugging. */ @@ -60,15 +68,16 @@ main(int argc, char *argv[]) #endif /* Track progress unless we're re-directing output to a file. */ - g.track = isatty(1) ? 1 : 0; + g.c_quiet = isatty(1) ? 0 : 1; /* Set values from the command line. */ home = NULL; + onerun = 0; while ((ch = __wt_getopt( g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ - g.c_runs = 1; + onerun = 1; break; case 'C': /* wiredtiger_open config */ g.config_open = __wt_optarg; @@ -94,7 +103,7 @@ main(int argc, char *argv[]) g.logging = LOG_OPS; break; case 'q': /* Quiet */ - g.track = 0; + g.c_quiet = 1; break; case 'r': /* Replay a run */ g.replay = 1; @@ -105,14 +114,8 @@ main(int argc, char *argv[]) argc -= __wt_optind; argv += __wt_optind; - /* - * Initialize the global RNG. Start with the standard seeds, and then - * use seconds since the Epoch modulo a prime to run the RNG for some - * number of steps, so we don't start with the same values every time. - */ + /* Initialize the global RNG. */ __wt_random_init(&g.rnd); - for (i = (int)time(NULL) % 10007; i > 0; --i) - (void)__wt_random(&g.rnd); /* Set up paths. */ path_setup(home); @@ -120,9 +123,9 @@ main(int argc, char *argv[]) /* If it's a replay, use the home directory's CONFIG file. */ if (g.replay) { if (config != NULL) - die(EINVAL, "-c incompatible with -r"); + testutil_die(EINVAL, "-c incompatible with -r"); if (access(g.home_config, R_OK) != 0) - die(ENOENT, "%s", g.home_config); + testutil_die(ENOENT, "%s", g.home_config); config = g.home_config; } @@ -160,20 +163,21 @@ main(int argc, char *argv[]) if (g.replay && SINGLETHREADED) g.c_runs = 1; - /* Use line buffering on stdout so status updates aren't buffered. */ - (void)setvbuf(stdout, NULL, _IOLBF, 32); - /* - * Initialize locks to single-thread named checkpoints and backups, and - * to single-thread last-record updates. + * Let the command line -1 flag override runs configured from other + * sources. */ - if ((ret = pthread_rwlock_init(&g.append_lock, NULL)) != 0) - die(ret, "pthread_rwlock_init: append lock"); - if ((ret = pthread_rwlock_init(&g.backup_lock, NULL)) != 0) - die(ret, "pthread_rwlock_init: backup lock"); + if (onerun) + g.c_runs = 1; - /* Seed the random number generator. */ - srand((u_int)(0xdeadbeef ^ (u_int)time(NULL))); + /* + * Initialize locks to single-thread named checkpoints and backups, last + * last-record updates, and failures. + */ + testutil_check(pthread_rwlock_init(&g.append_lock, NULL)); + testutil_check(pthread_rwlock_init(&g.backup_lock, NULL)); + testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL)); + testutil_check(pthread_rwlock_init(&g.death_lock, NULL)); printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { @@ -186,10 +190,12 @@ main(int argc, char *argv[]) start = time(NULL); track("starting up", 0ULL, NULL); +#ifdef HAVE_BERKELEY_DB if (SINGLETHREADED) bdb_open(); /* Initial file config */ - wts_open(g.home, 1, &g.wts_conn); - wts_create(); +#endif + wts_open(g.home, true, &g.wts_conn); + wts_init(); wts_load(); /* Load initial records */ wts_verify("post-bulk verify"); /* Verify */ @@ -225,8 +231,10 @@ main(int argc, char *argv[]) } track("shutting down", 0ULL, NULL); +#ifdef HAVE_BERKELEY_DB if (SINGLETHREADED) bdb_close(); +#endif wts_close(); /* @@ -242,25 +250,24 @@ main(int argc, char *argv[]) wts_salvage(); /* Overwrite the progress line with a completion line. */ - if (g.track) + if (!g.c_quiet) printf("\r%78s\r", " "); printf("%4d: %s, %s (%.0f seconds)\n", g.run_cnt, g.c_data_source, g.c_file_type, difftime(time(NULL), start)); + fflush(stdout); } /* Flush/close any logging information. */ - if (g.logfp != NULL) - (void)fclose(g.logfp); - if (g.rand_log != NULL) - (void)fclose(g.rand_log); + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); config_print(0); - if ((ret = pthread_rwlock_destroy(&g.append_lock)) != 0) - die(ret, "pthread_rwlock_destroy: append lock"); - if ((ret = pthread_rwlock_destroy(&g.backup_lock)) != 0) - die(ret, "pthread_rwlock_destroy: backup lock"); + testutil_check(pthread_rwlock_destroy(&g.append_lock)); + testutil_check(pthread_rwlock_destroy(&g.backup_lock)); + testutil_check(pthread_rwlock_destroy(&g.checkpoint_lock)); + testutil_check(pthread_rwlock_destroy(&g.death_lock)); config_clear(); @@ -274,81 +281,51 @@ main(int argc, char *argv[]) static void startup(void) { - int ret; - - /* Close the logging file. */ - if (g.logfp != NULL) { - (void)fclose(g.logfp); - g.logfp = NULL; - } - - /* Close the random number logging file. */ - if (g.rand_log != NULL) { - (void)fclose(g.rand_log); - g.rand_log = NULL; - } + WT_DECL_RET; - /* Create home if it doesn't yet exist. */ - if (access(g.home, X_OK) != 0 && mkdir(g.home, 0777) != 0) - die(errno, "mkdir: %s", g.home); + /* Flush/close any logging information. */ + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); - /* Remove the run's files except for rand. */ + /* Create or initialize the home and data-source directories. */ if ((ret = system(g.home_init)) != 0) - die(ret, "home directory initialization failed"); + testutil_die(ret, "home directory initialization failed"); - /* Create the data-source directory. */ - if (mkdir(g.home_kvs, 0777) != 0) - die(errno, "mkdir: %s", g.home_kvs); + /* Open/truncate the logging file. */ + if (g.logging != 0 && (g.logfp = fopen(g.home_log, "w")) == NULL) + testutil_die(errno, "fopen: %s", g.home_log); - /* - * Open/truncate the logging file; line buffer so we see up-to-date - * information on error. - */ - if (g.logging != 0) { - if ((g.logfp = fopen(g.home_log, "w")) == NULL) - die(errno, "fopen: %s", g.home_log); - (void)setvbuf(g.logfp, NULL, _IOLBF, 0); - } - - /* - * Open/truncate the random number logging file; line buffer so we see - * up-to-date information on error. - */ - if ((g.rand_log = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL) - die(errno, "%s", g.home_rand); - (void)setvbuf(g.rand_log, NULL, _IOLBF, 32); + /* Open/truncate the random number logging file. */ + if ((g.randfp = fopen(g.home_rand, g.replay ? "r" : "w")) == NULL) + testutil_die(errno, "%s", g.home_rand); } /* * die -- - * Report an error and quit, dumping the configuration. + * Report an error, dumping the configuration. */ -void -die(int e, const char *fmt, ...) +static void +format_die(void) { - va_list ap; - - if (fmt != NULL) { /* Death message. */ - fprintf(stderr, "%s: ", g.progname); - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - if (e != 0) - fprintf(stderr, ": %s", wiredtiger_strerror(e)); + /* + * Single-thread error handling, our caller exits after calling + * us - don't release the lock. + */ + (void)pthread_rwlock_wrlock(&g.death_lock); + + /* Try and turn off tracking so it doesn't obscure the error message. */ + if (!g.c_quiet) { + g.c_quiet = 1; fprintf(stderr, "\n"); } /* Flush/close any logging information. */ - if (g.logfp != NULL) - (void)fclose(g.logfp); - if (g.rand_log != NULL) - (void)fclose(g.rand_log); + fclose_and_clear(&g.logfp); + fclose_and_clear(&g.randfp); /* Display the configuration that failed. */ if (g.run_cnt) config_print(1); - - exit(EXIT_FAILURE); } /* diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c index dff5ab43571..4f5e6528908 100644 --- a/src/third_party/wiredtiger/test/format/util.c +++ b/src/third_party/wiredtiger/test/format/util.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -32,28 +32,11 @@ #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #endif -static inline uint32_t -kv_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max) -{ - /* - * Focus on relatively small key/value items, admitting the possibility - * of larger items. Pick a size close to the minimum most of the time, - * only create a larger item 1 in 20 times, and a really big item 1 in - * 1000 times. (Configuration can force large key/value minimum sizes, - * where every key/value item is an overflow.) - */ - if (keyno % 1000 == 0 && max < KILOBYTE(80)) { - min = KILOBYTE(80); - max = KILOBYTE(100); - } else if (keyno % 20 != 0 && max > min + 20) - max = min + 20; - return (mmrand(rnd, min, max)); -} - void key_len_setup(void) { size_t i; + uint32_t max; /* * The key is a variable length item with a leading 10-digit value. @@ -63,73 +46,123 @@ key_len_setup(void) * the pre-loaded lengths. * * Fill in the random key lengths. + * + * Focus on relatively small items, admitting the possibility of larger + * items. Pick a size close to the minimum most of the time, only create + * a larger item 1 in 20 times. */ - for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) - g.key_rand_len[i] = - kv_len(NULL, (uint64_t)i, g.c_key_min, g.c_key_max); + for (i = 0; + i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i) { + max = g.c_key_max; + if (i % 20 != 0 && max > g.c_key_min + 20) + max = g.c_key_min + 20; + g.key_rand_len[i] = mmrand(NULL, g.c_key_min, max); + } } void -key_gen_setup(uint8_t **keyp) +key_gen_setup(WT_ITEM *key) { - uint8_t *key; size_t i, len; - - *keyp = NULL; + char *p; len = MAX(KILOBYTE(100), g.c_key_max); - if ((key = malloc(len)) == NULL) - die(errno, "malloc"); + p = dmalloc(len); for (i = 0; i < len; ++i) - key[i] = (uint8_t)("abcdefghijklmnopqrstuvwxyz"[i % 26]); - *keyp = key; + p[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26]; + + key->mem = p; + key->memsize = len; + key->data = key->mem; + key->size = 0; } static void -key_gen_common(uint8_t *key, size_t *sizep, uint64_t keyno, int suffix) +key_gen_common(WT_ITEM *key, uint64_t keyno, const char * const suffix) { int len; + char *p; + + p = key->mem; /* - * The key always starts with a 10-digit string (the specified cnt) + * The key always starts with a 10-digit string (the specified row) * followed by two digits, a random number between 1 and 15 if it's * an insert, otherwise 00. */ - len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix); + u64_to_string_zf(keyno, key->mem, 11); + p[10] = '.'; + p[11] = suffix[0]; + p[12] = suffix[1]; + len = 13; /* - * In a column-store, the key is only used for BDB, and so it doesn't - * need a random length. + * In a column-store, the key is only used for Berkeley DB inserts, + * and so it doesn't need a random length. */ if (g.type == ROW) { - key[len] = '/'; - len = (int)g.key_rand_len[keyno % - (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))]; + p[len] = '/'; + + /* + * Because we're doing table lookup for key sizes, we weren't + * able to set really big keys sizes in the table, the table + * isn't big enough to keep our hash from selecting too many + * big keys and blowing out the cache. Handle that here, use a + * really big key 1 in 2500 times. + */ + len = keyno % 2500 == 0 && g.c_key_max < KILOBYTE(80) ? + KILOBYTE(80) : + (int)g.key_rand_len[keyno % WT_ELEMENTS(g.key_rand_len)]; } - *sizep = (size_t)len; + + key->data = key->mem; + key->size = (size_t)len; } void -key_gen(uint8_t *key, size_t *sizep, uint64_t keyno) +key_gen(WT_ITEM *key, uint64_t keyno) { - key_gen_common(key, sizep, keyno, 0); + key_gen_common(key, keyno, "00"); } void -key_gen_insert(WT_RAND_STATE *rnd, uint8_t *key, size_t *sizep, uint64_t keyno) +key_gen_insert(WT_RAND_STATE *rnd, WT_ITEM *key, uint64_t keyno) { - key_gen_common(key, sizep, keyno, (int)mmrand(rnd, 1, 15)); + static const char * const suffix[15] = { + "01", "02", "03", "04", "05", + "06", "07", "08", "09", "10", + "11", "12", "13", "14", "15" + }; + + key_gen_common(key, keyno, suffix[mmrand(rnd, 1, 15) - 1]); } static uint32_t val_dup_data_len; /* Length of duplicate data items */ +static inline uint32_t +value_len(WT_RAND_STATE *rnd, uint64_t keyno, uint32_t min, uint32_t max) +{ + /* + * Focus on relatively small items, admitting the possibility of larger + * items. Pick a size close to the minimum most of the time, only create + * a larger item 1 in 20 times, and a really big item 1 in somewhere + * around 2500 items. + */ + if (keyno % 2500 == 0 && max < KILOBYTE(80)) { + min = KILOBYTE(80); + max = KILOBYTE(100); + } else if (keyno % 20 != 0 && max > min + 20) + max = min + 20; + return (mmrand(rnd, min, max)); +} + void -val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp) +val_gen_setup(WT_RAND_STATE *rnd, WT_ITEM *value) { - uint8_t *val; size_t i, len; + char *p; - *valp = NULL; + memset(value, 0, sizeof(WT_ITEM)); /* * Set initial buffer contents to recognizable text. @@ -139,36 +172,43 @@ val_gen_setup(WT_RAND_STATE *rnd, uint8_t **valp) * data for column-store run-length encoded files. */ len = MAX(KILOBYTE(100), g.c_value_max) + 20; - if ((val = malloc(len)) == NULL) - die(errno, "malloc"); + p = dmalloc(len); for (i = 0; i < len; ++i) - val[i] = (uint8_t)("ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]); + p[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26]; - *valp = val; + value->mem = p; + value->memsize = len; + value->data = value->mem; + value->size = 0; - val_dup_data_len = kv_len(rnd, + val_dup_data_len = value_len(rnd, (uint64_t)mmrand(rnd, 1, 20), g.c_value_min, g.c_value_max); } void -val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno) +val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno) { + char *p; + + p = value->mem; + value->data = value->mem; + /* * Fixed-length records: take the low N bits from the last digit of * the record number. */ if (g.type == FIX) { switch (g.c_bitcnt) { - case 8: val[0] = mmrand(rnd, 1, 0xff); break; - case 7: val[0] = mmrand(rnd, 1, 0x7f); break; - case 6: val[0] = mmrand(rnd, 1, 0x3f); break; - case 5: val[0] = mmrand(rnd, 1, 0x1f); break; - case 4: val[0] = mmrand(rnd, 1, 0x0f); break; - case 3: val[0] = mmrand(rnd, 1, 0x07); break; - case 2: val[0] = mmrand(rnd, 1, 0x03); break; - case 1: val[0] = 1; break; + case 8: p[0] = (char)mmrand(rnd, 1, 0xff); break; + case 7: p[0] = (char)mmrand(rnd, 1, 0x7f); break; + case 6: p[0] = (char)mmrand(rnd, 1, 0x3f); break; + case 5: p[0] = (char)mmrand(rnd, 1, 0x1f); break; + case 4: p[0] = (char)mmrand(rnd, 1, 0x0f); break; + case 3: p[0] = (char)mmrand(rnd, 1, 0x07); break; + case 2: p[0] = (char)mmrand(rnd, 1, 0x03); break; + case 1: p[0] = 1; break; } - *sizep = 1; + value->size = 1; return; } @@ -177,29 +217,24 @@ val_gen(WT_RAND_STATE *rnd, uint8_t *val, size_t *sizep, uint64_t keyno) * test that by inserting a zero-length data item every so often. */ if (keyno % 63 == 0) { - val[0] = '\0'; - *sizep = 0; + p[0] = '\0'; + value->size = 0; return; } /* - * Start the data with a 10-digit number. - * - * For row and non-repeated variable-length column-stores, change the - * leading number to ensure every data item is unique. For repeated - * variable-length column-stores (that is, to test run-length encoding), - * use the same data value all the time. + * Data items have unique leading numbers by default and random lengths; + * variable-length column-stores use a duplicate data value to test RLE. */ - if ((g.type == ROW || g.type == VAR) && - g.c_repeat_data_pct != 0 && - mmrand(rnd, 1, 100) < g.c_repeat_data_pct) { - (void)strcpy((char *)val, "DUPLICATEV"); - val[10] = '/'; - *sizep = val_dup_data_len; + if (g.type == VAR && mmrand(rnd, 1, 100) < g.c_repeat_data_pct) { + (void)strcpy(p, "DUPLICATEV"); + p[10] = '/'; + value->size = val_dup_data_len; } else { - (void)sprintf((char *)val, "%010" PRIu64, keyno); - val[10] = '/'; - *sizep = kv_len(rnd, keyno, g.c_value_min, g.c_value_max); + u64_to_string_zf(keyno, p, 11); + p[10] = '/'; + value->size = + value_len(rnd, keyno, g.c_value_min, g.c_value_max); } } @@ -210,7 +245,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo) int len; char msg[128]; - if (!g.track || tag == NULL) + if (g.c_quiet || tag == NULL) return; if (tinfo == NULL && cnt == 0) @@ -242,9 +277,9 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo) lastlen = len; if (printf("%s\r", msg) < 0) - die(EIO, "printf"); + testutil_die(EIO, "printf"); if (fflush(stdout) == EOF) - die(errno, "fflush"); + testutil_die(errno, "fflush"); } /* @@ -257,80 +292,82 @@ path_setup(const char *home) size_t len; /* Home directory. */ - if ((g.home = strdup(home == NULL ? "RUNDIR" : home)) == NULL) - die(errno, "malloc"); + g.home = dstrdup(home == NULL ? "RUNDIR" : home); /* Log file. */ len = strlen(g.home) + strlen("log") + 2; - if ((g.home_log = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_log = dmalloc(len); snprintf(g.home_log, len, "%s/%s", g.home, "log"); /* RNG log file. */ len = strlen(g.home) + strlen("rand") + 2; - if ((g.home_rand = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_rand = dmalloc(len); snprintf(g.home_rand, len, "%s/%s", g.home, "rand"); /* Run file. */ len = strlen(g.home) + strlen("CONFIG") + 2; - if ((g.home_config = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_config = dmalloc(len); snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG"); /* Statistics file. */ len = strlen(g.home) + strlen("stats") + 2; - if ((g.home_stats = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_stats = dmalloc(len); snprintf(g.home_stats, len, "%s/%s", g.home, "stats"); - /* Backup directory. */ - len = strlen(g.home) + strlen("BACKUP") + 2; - if ((g.home_backup = malloc(len)) == NULL) - die(errno, "malloc"); - snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); - /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; - if ((g.home_bdb = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_bdb = dmalloc(len); snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb"); - /* KVS directory. */ - len = strlen(g.home) + strlen("KVS") + 2; - if ((g.home_kvs = malloc(len)) == NULL) - die(errno, "malloc"); - snprintf(g.home_kvs, len, "%s/%s", g.home, "KVS"); - /* - * Home directory initialize command: remove everything except the RNG - * log file. + * Home directory initialize command: create the directory if it doesn't + * exist, else remove everything except the RNG log file, create the KVS + * sub-directory. * * Redirect the "cd" command to /dev/null so chatty cd implementations * don't add the new working directory to our output. */ #undef CMD #ifdef _WIN32 -#define CMD "cd %s && del /s /q * >:nul && rd /s /q KVS" +#define CMD "del /q rand.copy & " \ + "(IF EXIST %s\\rand copy /y %s\\rand rand.copy) & " \ + "(IF EXIST %s rd /s /q %s) & mkdir %s & " \ + "(IF EXIST rand.copy copy rand.copy %s\\rand) & " \ + "cd %s & mkdir KVS" + len = strlen(g.home) * 7 + strlen(CMD) + 1; + g.home_init = dmalloc(len); + snprintf(g.home_init, len, CMD, + g.home, g.home, g.home, g.home, g.home, g.home, g.home); #else -#define CMD "cd %s > /dev/null && rm -rf `ls | sed /rand/d`" +#define CMD "test -e %s || mkdir %s; " \ + "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \ + "mkdir KVS" + len = strlen(g.home) * 3 + strlen(CMD) + 1; + g.home_init = dmalloc(len); + snprintf(g.home_init, len, CMD, g.home, g.home, g.home); #endif - len = strlen(g.home) + strlen(CMD) + 1; - if ((g.home_init = malloc(len)) == NULL) - die(errno, "malloc"); - snprintf(g.home_init, len, CMD, g.home); - /* Backup directory initialize command, remove and re-create it. */ + /* Primary backup directory. */ + len = strlen(g.home) + strlen("BACKUP") + 2; + g.home_backup = dmalloc(len); + snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + + /* + * Backup directory initialize command, remove and re-create the primary + * backup directory, plus a copy we maintain for recovery testing. + */ #undef CMD #ifdef _WIN32 -#define CMD "del /s /q >:nul && mkdir %s" +#define CMD "rd /s /q %s\\%s %s\\%s & mkdir %s\\%s %s\\%s" #else -#define CMD "rm -rf %s && mkdir %s" +#define CMD "rm -rf %s/%s %s/%s && mkdir %s/%s %s/%s" #endif - len = strlen(g.home_backup) * 2 + strlen(CMD) + 1; - if ((g.home_backup_init = malloc(len)) == NULL) - die(errno, "malloc"); - snprintf(g.home_backup_init, len, CMD, g.home_backup, g.home_backup); + len = strlen(g.home) * 4 + + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1; + g.home_backup_init = dmalloc(len); + snprintf(g.home_backup_init, len, CMD, + g.home, "BACKUP", g.home, "BACKUP_COPY", + g.home, "BACKUP", g.home, "BACKUP_COPY"); /* * Salvage command, save the interesting files so we can replay the @@ -352,8 +389,7 @@ path_setup(const char *home) "cp WiredTiger* wt* slvg.copy/" #endif len = strlen(g.home) + strlen(CMD) + 1; - if ((g.home_salvage_copy = malloc(len)) == NULL) - die(errno, "malloc"); + g.home_salvage_copy = dmalloc(len); snprintf(g.home_salvage_copy, len, CMD, g.home); } @@ -375,12 +411,8 @@ rng(WT_RAND_STATE *rnd) rnd = &g.rnd; /* - * We can entirely reproduce a run based on the random numbers used - * in the initial run, plus the configuration files. It would be - * nice to just log the initial RNG seed, rather than logging every - * random number generated, but we'd have to include our own RNG, - * Berkeley DB calls rand() internally, and that messes up the pattern - * of random numbers. + * We can reproduce a single-threaded run based on the random numbers + * used in the initial run, plus the configuration files. * * Check g.replay and g.rand_log_stop: multithreaded runs log/replay * until they get to the operations phase, then turn off log/replay, @@ -390,14 +422,13 @@ rng(WT_RAND_STATE *rnd) return (__wt_random(rnd)); if (g.replay) { - if (fgets(buf, sizeof(buf), g.rand_log) == NULL) { - if (feof(g.rand_log)) { + if (fgets(buf, sizeof(buf), g.randfp) == NULL) { + if (feof(g.randfp)) { fprintf(stderr, - "end of random number log reached, " - "exiting\n"); + "\n" "end of random number log reached\n"); exit(EXIT_SUCCESS); } - die(errno, "random number log"); + testutil_die(errno, "random number log"); } return ((uint32_t)strtoul(buf, NULL, 10)); @@ -405,6 +436,26 @@ rng(WT_RAND_STATE *rnd) r = __wt_random(rnd); - fprintf(g.rand_log, "%" PRIu32 "\n", r); + /* Save and flush the random number so we're up-to-date on error. */ + (void)fprintf(g.randfp, "%" PRIu32 "\n", r); + (void)fflush(g.randfp); + return (r); } + +/* + * fclose_and_clear -- + * Close a file and clear the handle so we don't close twice. + */ +void +fclose_and_clear(FILE **fpp) +{ + FILE *fp; + + if ((fp = *fpp) == NULL) + return; + *fpp = NULL; + if (fclose(fp) != 0) + testutil_die(errno, "fclose"); + return; +} diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c index 986796b2fc6..bf0cb6c14a4 100644 --- a/src/third_party/wiredtiger/test/format/wts.c +++ b/src/third_party/wiredtiger/test/format/wts.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2016 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. @@ -28,18 +28,53 @@ #include "format.h" +/* + * compressor -- + * Configure compression. + */ +static const char * +compressor(uint32_t compress_flag) +{ + switch (compress_flag) { + case COMPRESS_NONE: + return ("none"); + case COMPRESS_LZ4: + return ("lz4"); + case COMPRESS_LZ4_NO_RAW: + return ("lz4-noraw"); + case COMPRESS_LZO: + return ("LZO1B-6"); + case COMPRESS_SNAPPY: + return ("snappy"); + case COMPRESS_ZLIB: + return ("zlib"); + case COMPRESS_ZLIB_NO_RAW: + return ("zlib-noraw"); + default: + break; + } + testutil_die(EINVAL, + "illegal compression flag: %#" PRIx32, compress_flag); +} + static int handle_message(WT_EVENT_HANDLER *handler, WT_SESSION *session, const char *message) { + int out; + (void)(handler); (void)(session); - if (g.logfp != NULL) - return (fprintf( - g.logfp, "%p:%s\n", session, message) < 0 ? -1 : 0); - - return (printf("%p:%s\n", session, message) < 0 ? -1 : 0); + /* Write and flush the message so we're up-to-date on error. */ + if (g.logfp == NULL) { + out = printf("%p:%s\n", (void *)session, message); + (void)fflush(stdout); + } else { + out = fprintf(g.logfp, "%p:%s\n", (void *)session, message); + (void)fflush(g.logfp); + } + return (out < 0 ? EIO : 0); } /* @@ -72,26 +107,23 @@ static WT_EVENT_HANDLER event_handler = { * Open a connection to a WiredTiger database. */ void -wts_open(const char *home, int set_api, WT_CONNECTION **connp) +wts_open(const char *home, bool set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; - int ret; - char config[4096], *end, *p; + WT_DECL_RET; + char *config, *end, *p, helium_config[1024]; *connp = NULL; - p = config; - end = config + sizeof(config); + config = p = g.wiredtiger_open_config; + end = config + sizeof(g.wiredtiger_open_config); p += snprintf(p, REMAIN(p, end), - "create,checkpoint_sync=false,cache_size=%" PRIu32 "MB", - g.c_cache); - -#ifdef _WIN32 - p += snprintf(p, REMAIN(p, end), ",error_prefix=\"t_format.exe\""); -#else - p += snprintf(p, REMAIN(p, end), ",error_prefix=\"%s\"", g.progname); -#endif + "create=true," + "cache_size=%" PRIu32 "MB," + "checkpoint_sync=false," + "error_prefix=\"%s\"", + g.c_cache, g.progname); /* LSM configuration. */ if (DATASOURCE("lsm")) @@ -107,17 +139,22 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) /* Logging configuration. */ if (g.c_logging) p += snprintf(p, REMAIN(p, end), - ",log=(enabled=true,archive=%d,prealloc=%d)", + ",log=(enabled=true,archive=%d,prealloc=%d" + ",compressor=\"%s\")", g.c_logging_archive ? 1 : 0, - g.c_logging_prealloc ? 1 : 0); + g.c_logging_prealloc ? 1 : 0, + compressor(g.c_logging_compression_flag)); /* Miscellaneous. */ -#ifndef _WIN32 +#ifdef HAVE_POSIX_MEMALIGN p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512"); #endif p += snprintf(p, REMAIN(p, end), ",mmap=%d", g.c_mmap ? 1 : 0); + if (g.c_direct_io) + p += snprintf(p, REMAIN(p, end), ",direct_io=(data)"); + if (g.c_data_extend) p += snprintf(p, REMAIN(p, end), ",file_extend=(data=8MB)"); @@ -141,9 +178,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) /* Extensions. */ p += snprintf(p, REMAIN(p, end), ",extensions=[" - "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", + "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", - access(BZIP_PATH, R_OK) == 0 ? BZIP_PATH : "", access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "", access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "", access(SNAPPY_PATH, R_OK) == 0 ? SNAPPY_PATH : "", @@ -161,19 +197,20 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) p += snprintf(p, REMAIN(p, end), ",%s", g.config_open); if (REMAIN(p, end) == 0) - die(ENOMEM, "wiredtiger_open configuration buffer too small"); + testutil_die(ENOMEM, + "wiredtiger_open configuration buffer too small"); /* * Direct I/O may not work with backups, doing copies through the buffer * cache after configuring direct I/O in Linux won't work. If direct - * I/O is configured, turn off backups. This isn't a great place to do + * I/O is configured, turn off backups. This isn't a great place to do * this check, but it's only here we have the configuration string. */ if (strstr(config, "direct_io") != NULL) g.c_backups = 0; - if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0) - die(ret, "wiredtiger_open: %s", home); + testutil_checkfmt( + wiredtiger_open(home, &event_handler, config, &conn), "%s", home); if (set_api) g.wt_api = conn->get_extension_api(conn); @@ -186,33 +223,48 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp) */ if (DATASOURCE("helium")) { if (g.helium_mount == NULL) - die(EINVAL, "no Helium mount point specified"); - (void)snprintf(config, sizeof(config), + testutil_die(EINVAL, "no Helium mount point specified"); + (void)snprintf(helium_config, sizeof(helium_config), "entry=wiredtiger_extension_init,config=[" "helium_verbose=0," "dev1=[helium_devices=\"he://./%s\"," "helium_o_volume_truncate=1]]", g.helium_mount); - if ((ret = - conn->load_extension(conn, HELIUM_PATH, config)) != 0) - die(ret, + if ((ret = conn->load_extension( + conn, HELIUM_PATH, helium_config)) != 0) + testutil_die(ret, "WT_CONNECTION.load_extension: %s:%s", - HELIUM_PATH, config); + HELIUM_PATH, helium_config); } *connp = conn; } /* + * wts_reopen -- + * Re-open a connection to a WiredTiger database. + */ +void +wts_reopen(void) +{ + WT_CONNECTION *conn; + + testutil_checkfmt(wiredtiger_open(g.home, &event_handler, + g.wiredtiger_open_config, &conn), "%s", g.home); + + g.wt_api = conn->get_extension_api(conn); + g.wts_conn = conn; +} + +/* * wts_create -- * Create the underlying store. */ void -wts_create(void) +wts_init(void) { WT_CONNECTION *conn; WT_SESSION *session; uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue; - int ret; char config[4096], *end, *p; conn = g.wts_conn; @@ -222,15 +274,16 @@ wts_create(void) /* * Ensure that we can service at least one operation per-thread - * concurrently without filling the cache with pinned pages. We - * choose a multiplier of three because the max configurations control - * on disk size and in memory pages are often significantly larger - * than their disk counterparts. + * concurrently without filling the cache with pinned pages. We choose + * a multiplier of three because the max configurations control on disk + * size and in memory pages are often significantly larger than their + * disk counterparts. We also apply the default eviction_dirty_trigger + * of 20% so that workloads don't get stuck with dirty pages in cache. */ maxintlpage = 1U << g.c_intl_page_max; maxleafpage = 1U << g.c_leaf_page_max; while (3 * g.c_threads * (maxintlpage + maxleafpage) > - g.c_cache << 20) { + (g.c_cache << 20) / 5) { if (maxleafpage <= 512 && maxintlpage <= 512) break; if (maxintlpage > 512) @@ -241,7 +294,7 @@ wts_create(void) p += snprintf(p, REMAIN(p, end), "key_format=%s," "allocation_size=512,%s" - "internal_page_max=%d,leaf_page_max=%d", + "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32, (g.type == ROW) ? "u" : "r", g.c_firstfit ? "block_allocation=first," : "", maxintlpage, maxleafpage); @@ -253,15 +306,15 @@ wts_create(void) maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) p += snprintf(p, REMAIN(p, end), - ",internal_key_max=%d", maxintlkey); + ",internal_key_max=%" PRIu32, maxintlkey); maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) p += snprintf(p, REMAIN(p, end), - ",leaf_key_max=%d", maxleafkey); + ",leaf_key_max=%" PRIu32, maxleafkey); maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) p += snprintf(p, REMAIN(p, end), - ",leaf_value_max=%d", maxleafvalue); + ",leaf_value_max=%" PRIu32, maxleafvalue); switch (g.type) { case FIX: @@ -289,7 +342,7 @@ wts_create(void) ",huffman_value=english"); if (g.c_dictionary) p += snprintf(p, REMAIN(p, end), - ",dictionary=%d", mmrand(NULL, 123, 517)); + ",dictionary=%" PRIu32, mmrand(NULL, 123, 517)); break; } @@ -307,38 +360,9 @@ wts_create(void) } /* Configure compression. */ - switch (g.c_compression_flag) { - case COMPRESS_NONE: - break; - case COMPRESS_BZIP: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"bzip2\""); - break; - case COMPRESS_BZIP_RAW: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"bzip2-raw-test\""); - break; - case COMPRESS_LZ4: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"lz4\""); - break; - case COMPRESS_LZO: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"LZO1B-6\""); - break; - case COMPRESS_SNAPPY: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"snappy\""); - break; - case COMPRESS_ZLIB: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"zlib\""); - break; - case COMPRESS_ZLIB_NO_RAW: - p += snprintf(p, REMAIN(p, end), - ",block_compressor=\"zlib-noraw\""); - break; - } + if (g.c_compression_flag != COMPRESS_NONE) + p += snprintf(p, REMAIN(p, end), ",block_compressor=\"%s\"", + compressor(g.c_compression_flag)); /* Configure Btree internal key truncation. */ p += snprintf(p, REMAIN(p, end), ",internal_key_truncate=%s", @@ -385,51 +409,50 @@ wts_create(void) } if (REMAIN(p, end) == 0) - die(ENOMEM, "WT_SESSION.create configuration buffer too small"); + testutil_die(ENOMEM, + "WT_SESSION.create configuration buffer too small"); /* * Create the underlying store. */ - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); - if ((ret = session->create(session, g.uri, config)) != 0) - die(ret, "session.create: %s", g.uri); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_checkfmt(session->create(session, g.uri, config), "%s", g.uri); + testutil_check(session->close(session, NULL)); } void wts_close(void) { WT_CONNECTION *conn; - int ret; const char *config; conn = g.wts_conn; config = g.c_leak_memory ? "leak_memory" : NULL; - if ((ret = conn->close(conn, config)) != 0) - die(ret, "connection.close"); + testutil_check(conn->close(conn, config)); + g.wts_conn = NULL; + g.wt_api = NULL; } void wts_dump(const char *tag, int dump_bdb) { +#ifdef HAVE_BERKELEY_DB size_t len; - int ret; char *cmd; - /* Some data-sources don't support dump through the wt utility. */ + /* + * In-memory configurations and data-sources don't support dump through + * the wt utility. + */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) return; -#ifndef _WIN32 track("dump files and compare", 0ULL, NULL); len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100; - if ((cmd = malloc(len)) == NULL) - die(errno, "malloc"); + cmd = dmalloc(len); (void)snprintf(cmd, len, "sh s_dumpcmp -h %s %s %s %s %s %s", g.home, @@ -439,9 +462,11 @@ wts_dump(const char *tag, int dump_bdb) g.uri == NULL ? "" : "-n", g.uri == NULL ? "" : g.uri); - if ((ret = system(cmd)) != 0) - die(ret, "%s: dump comparison failed", tag); + testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag); free(cmd); +#else + (void)tag; /* [-Wunused-variable] */ + (void)dump_bdb; /* [-Wunused-variable] */ #endif } @@ -449,14 +474,16 @@ void wts_verify(const char *tag) { WT_CONNECTION *conn; + WT_DECL_RET; WT_SESSION *session; - int ret; + + if (g.c_verify == 0) + return; conn = g.wts_conn; track("verify", 0ULL, NULL); - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== verify start ==============="); @@ -464,13 +491,12 @@ wts_verify(const char *tag) /* Session operations for LSM can return EBUSY. */ ret = session->verify(session, g.uri, "strict"); if (ret != 0 && !(ret == EBUSY && DATASOURCE("lsm"))) - die(ret, "session.verify: %s: %s", g.uri, tag); + testutil_die(ret, "session.verify: %s: %s", g.uri, tag); if (g.logging != 0) (void)g.wt_api->msg_printf(g.wt_api, session, "=============== verify stop ==============="); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); } /* @@ -482,12 +508,13 @@ wts_stats(void) { WT_CONNECTION *conn; WT_CURSOR *cursor; + WT_DECL_RET; WT_SESSION *session; FILE *fp; + size_t len; char *stat_name; const char *pval, *desc; uint64_t v; - int ret; /* Ignore statistics if they're not configured. */ if (g.c_statistics == 0) @@ -500,52 +527,44 @@ wts_stats(void) conn = g.wts_conn; track("stat", 0ULL, NULL); - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - die(ret, "connection.open_session"); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); if ((fp = fopen(g.home_stats, "w")) == NULL) - die(errno, "fopen: %s", g.home_stats); + testutil_die(errno, "fopen: %s", g.home_stats); /* Connection statistics. */ fprintf(fp, "====== Connection statistics:\n"); - if ((ret = session->open_cursor(session, - "statistics:", NULL, NULL, &cursor)) != 0) - die(ret, "session.open_cursor"); + testutil_check(session->open_cursor( + session, "statistics:", NULL, NULL, &cursor)); while ((ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0) if (fprintf(fp, "%s=%s\n", desc, pval) < 0) - die(errno, "fprintf"); + testutil_die(errno, "fprintf"); if (ret != WT_NOTFOUND) - die(ret, "cursor.next"); - if ((ret = cursor->close(cursor)) != 0) - die(ret, "cursor.close"); + testutil_die(ret, "cursor.next"); + testutil_check(cursor->close(cursor)); /* Data source statistics. */ fprintf(fp, "\n\n====== Data source statistics:\n"); - if ((stat_name = - malloc(strlen("statistics:") + strlen(g.uri) + 1)) == NULL) - die(errno, "malloc"); - sprintf(stat_name, "statistics:%s", g.uri); - if ((ret = session->open_cursor( - session, stat_name, NULL, NULL, &cursor)) != 0) - die(ret, "session.open_cursor"); + len = strlen("statistics:") + strlen(g.uri) + 1; + stat_name = dmalloc(len); + snprintf(stat_name, len, "statistics:%s", g.uri); + testutil_check(session->open_cursor( + session, stat_name, NULL, NULL, &cursor)); free(stat_name); while ((ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0) if (fprintf(fp, "%s=%s\n", desc, pval) < 0) - die(errno, "fprintf"); + testutil_die(errno, "fprintf"); if (ret != WT_NOTFOUND) - die(ret, "cursor.next"); - if ((ret = cursor->close(cursor)) != 0) - die(ret, "cursor.close"); + testutil_die(ret, "cursor.next"); + testutil_check(cursor->close(cursor)); - if ((ret = fclose(fp)) != 0) - die(ret, "fclose"); + fclose_and_clear(&fp); - if ((ret = session->close(session, NULL)) != 0) - die(ret, "session.close"); + testutil_check(session->close(session, NULL)); } diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am new file mode 100644 index 00000000000..688571ec5d8 --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/Makefile.am @@ -0,0 +1,18 @@ +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS +=-I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility + +noinst_PROGRAMS = random-abort truncated-log +random_abort_SOURCES = random-abort.c +random_abort_LDADD =$(top_builddir)/libwiredtiger.la +random_abort_LDFLAGS = -static + +truncated_log_SOURCES = truncated-log.c +truncated_log_LDADD =$(top_builddir)/libwiredtiger.la +truncated_log_LDFLAGS = -static + +# Run this during a "make check" smoke test. +TESTS = smoke.sh + +clean-local: + rm -rf WT_TEST.* *.core diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c new file mode 100644 index 00000000000..31e4b0f30ff --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -0,0 +1,428 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <wiredtiger.h> +#include "wt_internal.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/wait.h> +#include <signal.h> + +static char home[512]; /* Program working dir */ +static const char *progname; /* Program name */ +static const char * const uri = "table:main"; +static int inmem; + +#define MAX_TH 12 +#define MIN_TH 5 +#define MAX_TIME 40 +#define MIN_TIME 10 +#define RECORDS_FILE "records-%" PRIu32 + +#define ENV_CONFIG_DEF \ + "create,log=(file_max=10M,archive=false,enabled)," \ + "transaction_sync=(enabled=false,method=none)" +#define ENV_CONFIG_TXNSYNC \ + "create,log=(file_max=10M,archive=false,enabled)," \ + "transaction_sync=(enabled,method=none)" +#define ENV_CONFIG_REC "log=(recover=on)" +#define MAX_VAL 4096 + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname); + exit(EXIT_FAILURE); +} + +static void +die(int e, const char *m) +{ + fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e)); + exit(EXIT_FAILURE); +} + +typedef struct { + WT_CONNECTION *conn; + uint64_t start; + uint32_t id; +} WT_THREAD_DATA; + +static void * +thread_run(void *arg) +{ + FILE *fp; + WT_CURSOR *cursor; + WT_ITEM data; + WT_RAND_STATE rnd; + WT_SESSION *session; + WT_THREAD_DATA *td; + uint64_t i; + int ret; + size_t lsize; + char buf[MAX_VAL], kname[64], lgbuf[8]; + char large[128*1024]; + + __wt_random_init(&rnd); + memset(buf, 0, sizeof(buf)); + memset(kname, 0, sizeof(kname)); + lsize = sizeof(large); + memset(large, 0, lsize); + + td = (WT_THREAD_DATA *)arg; + /* + * The value is the name of the record file with our id appended. + */ + snprintf(buf, sizeof(buf), RECORDS_FILE, td->id); + /* + * Set up a large value putting our id in it. Write it in there a + * bunch of times, but the rest of the buffer can just be zero. + */ + snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id); + for (i = 0; i < 128; i += strlen(lgbuf)) + snprintf(&large[i], lsize - i, "%s", lgbuf); + /* + * Keep a separate file with the records we wrote for checking. + */ + (void)unlink(buf); + if ((fp = fopen(buf, "w")) == NULL) + die(errno, "fopen"); + /* + * Set to line buffering. But that is advisory only. We've seen + * cases where the result files end up with partial lines. + */ + (void)setvbuf(fp, NULL, _IOLBF, 1024); + if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) + die(ret, "WT_CONNECTION:open_session"); + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + die(ret, "WT_SESSION.open_cursor"); + data.data = buf; + data.size = sizeof(buf); + /* + * Write our portion of the key space until we're killed. + */ + for (i = td->start; ; ++i) { + snprintf(kname, sizeof(kname), "%" PRIu64, i); + cursor->set_key(cursor, kname); + /* + * Every 30th record write a very large record that exceeds the + * log buffer size. This forces us to use the unbuffered path. + */ + if (i % 30 == 0) { + data.size = 128 * 1024; + data.data = large; + } else { + data.size = __wt_random(&rnd) % MAX_VAL; + data.data = buf; + } + cursor->set_value(cursor, &data); + if ((ret = cursor->insert(cursor)) != 0) + die(ret, "WT_CURSOR.insert"); + /* + * Save the key separately for checking later. + */ + if (fprintf(fp, "%" PRIu64 "\n", i) == -1) + die(errno, "fprintf"); + } + return (NULL); +} + +/* + * Child process creates the database and table, and then creates worker + * threads to add data until it is killed by the parent. + */ +static void +fill_db(uint32_t nth) +{ + pthread_t *thr; + WT_CONNECTION *conn; + WT_SESSION *session; + WT_THREAD_DATA *td; + uint32_t i; + int ret; + const char *envconf; + + thr = calloc(nth, sizeof(pthread_t)); + td = calloc(nth, sizeof(WT_THREAD_DATA)); + if (chdir(home) != 0) + die(errno, "Child chdir"); + if (inmem) + envconf = ENV_CONFIG_DEF; + else + envconf = ENV_CONFIG_TXNSYNC; + if ((ret = wiredtiger_open(NULL, NULL, envconf, &conn)) != 0) + die(ret, "wiredtiger_open"); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "WT_CONNECTION:open_session"); + if ((ret = session->create(session, + uri, "key_format=S,value_format=u")) != 0) + die(ret, "WT_SESSION.create"); + if ((ret = session->close(session, NULL)) != 0) + die(ret, "WT_SESSION:close"); + + printf("Create %" PRIu32 " writer threads\n", nth); + for (i = 0; i < nth; ++i) { + td[i].conn = conn; + td[i].start = (UINT64_MAX / nth) * i; + td[i].id = i; + if ((ret = pthread_create( + &thr[i], NULL, thread_run, &td[i])) != 0) + die(ret, "pthread_create"); + } + printf("Spawned %" PRIu32 " writer threads\n", nth); + fflush(stdout); + /* + * The threads never exit, so the child will just wait here until + * it is killed. + */ + for (i = 0; i < nth; ++i) + (void)pthread_join(thr[i], NULL); + /* + * NOTREACHED + */ + free(thr); + free(td); + exit(EXIT_SUCCESS); +} + +extern int __wt_optind; +extern char *__wt_optarg; + +int +main(int argc, char *argv[]) +{ + FILE *fp; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + WT_RAND_STATE rnd; + uint64_t absent, count, key, last_key, middle; + uint32_t i, nth, timeout; + int ch, status, ret; + pid_t pid; + int fatal, rand_th, rand_time, verify_only; + const char *working_dir; + char cmd[1024], fname[64], kname[64]; + + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + inmem = 0; + nth = MIN_TH; + rand_th = rand_time = 1; + timeout = MIN_TIME; + verify_only = 0; + working_dir = "WT_TEST.random-abort"; + + while ((ch = __wt_getopt(progname, argc, argv, "h:mT:t:v")) != EOF) + switch (ch) { + case 'h': + working_dir = __wt_optarg; + break; + case 'm': + inmem = 1; + break; + case 'T': + rand_th = 0; + nth = (uint32_t)atoi(__wt_optarg); + break; + case 't': + rand_time = 0; + timeout = (uint32_t)atoi(__wt_optarg); + break; + case 'v': + verify_only = 1; + break; + default: + usage(); + } + argc -= __wt_optind; + argv += __wt_optind; + if (argc != 0) + usage(); + + if ((strlen(working_dir) + 1) > 512) { + fprintf(stderr, + "Not enough memory in buffer for directory %s\n", + working_dir); + return (EXIT_FAILURE); + } + snprintf(home, 512, "%s", working_dir); + /* + * If the user wants to verify they need to tell us how many threads + * there were so we can find the old record files. + */ + if (verify_only && rand_th) { + fprintf(stderr, + "Verify option requires specifying number of threads\n"); + exit (EXIT_FAILURE); + } + if (!verify_only) { + snprintf(cmd, 1024, "rm -rf %s", home); + if ((ret = system(cmd)) != 0) + die(ret, "system rm"); + snprintf(cmd, 1024, "mkdir %s", home); + if ((ret = system(cmd)) != 0) + die(ret, "system mkdir"); + + __wt_random_init(&rnd); + if (rand_time) { + timeout = __wt_random(&rnd) % MAX_TIME; + if (timeout < MIN_TIME) + timeout = MIN_TIME; + } + if (rand_th) { + nth = __wt_random(&rnd) % MAX_TH; + if (nth < MIN_TH) + nth = MIN_TH; + } + printf("Parent: Create %" PRIu32 + " threads; sleep %" PRIu32 " seconds\n", nth, timeout); + /* + * Fork a child to insert as many items. We will then randomly + * kill the child, run recovery and make sure all items we wrote + * exist after recovery runs. + */ + if ((pid = fork()) < 0) + die(errno, "fork"); + + if (pid == 0) { /* child */ + fill_db(nth); + return (EXIT_SUCCESS); + } + + /* parent */ + /* + * Sleep for the configured amount of time before killing + * the child. + */ + sleep(timeout); + + /* + * !!! It should be plenty long enough to make sure more than + * one log file exists. If wanted, that check would be added + * here. + */ + printf("Kill child\n"); + if (kill(pid, SIGKILL) != 0) + die(errno, "kill"); + if (waitpid(pid, &status, 0) == -1) + die(errno, "waitpid"); + } + /* + * !!! If we wanted to take a copy of the directory before recovery, + * this is the place to do it. + */ + if (chdir(home) != 0) + die(errno, "parent chdir"); + printf("Open database, run recovery and verify content\n"); + if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) + die(ret, "wiredtiger_open"); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "WT_CONNECTION:open_session"); + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + die(ret, "WT_SESSION.open_cursor"); + + absent = count = 0; + fatal = 0; + for (i = 0; i < nth; ++i) { + middle = 0; + snprintf(fname, sizeof(fname), RECORDS_FILE, i); + if ((fp = fopen(fname, "r")) == NULL) { + fprintf(stderr, + "Failed to open %s. i %" PRIu32 "\n", fname, i); + die(errno, "fopen"); + } + + /* + * For every key in the saved file, verify that the key exists + * in the table after recovery. If we're doing in-memory + * log buffering we never expect a record missing in the middle, + * but records may be missing at the end. If we did + * write-no-sync, we expect every key to have been recovered. + */ + for (last_key = UINT64_MAX;; ++count, last_key = key) { + ret = fscanf(fp, "%" SCNu64 "\n", &key); + if (ret != EOF && ret != 1) + die(errno, "fscanf"); + if (ret == EOF) + break; + /* + * If we're unlucky, the last line may be a partially + * written key at the end that can result in a false + * negative error for a missing record. Detect it. + */ + if (last_key != UINT64_MAX && key != last_key + 1) { + printf("%s: Ignore partial record %" PRIu64 + " last valid key %" PRIu64 "\n", + fname, key, last_key); + break; + } + snprintf(kname, sizeof(kname), "%" PRIu64, key); + cursor->set_key(cursor, kname); + if ((ret = cursor->search(cursor)) != 0) { + if (ret != WT_NOTFOUND) + die(ret, "search"); + if (!inmem) + printf("%s: no record with key %" + PRIu64 "\n", fname, key); + absent++; + middle = key; + } else if (middle != 0) { + /* + * We should never find an existing key after + * we have detected one missing. + */ + printf("%s: after absent record at %" PRIu64 + " key %" PRIu64 " exists\n", + fname, middle, key); + fatal = 1; + } + } + if (fclose(fp) != 0) + die(errno, "fclose"); + } + if ((ret = conn->close(conn, NULL)) != 0) + die(ret, "WT_CONNECTION:close"); + if (fatal) + return (EXIT_FAILURE); + if (!inmem && absent) { + printf("%" PRIu64 " record(s) absent from %" PRIu64 "\n", + absent, count); + return (EXIT_FAILURE); + } + printf("%" PRIu64 " records verified\n", count); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh new file mode 100755 index 00000000000..ce0662d3b2b --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/smoke.sh @@ -0,0 +1,9 @@ +#! /bin/sh + +set -e + +# Smoke-test recovery as part of running "make check". + +$TEST_WRAPPER ./random-abort -t 10 -T 5 +$TEST_WRAPPER ./random-abort -m -t 10 -T 5 +$TEST_WRAPPER ./truncated-log diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c new file mode 100644 index 00000000000..9a16fafa49d --- /dev/null +++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c @@ -0,0 +1,398 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <wiredtiger.h> +#include "wt_internal.h" + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/wait.h> + +#ifdef _WIN32 +/* snprintf is not supported on <= VS2013 */ +#define snprintf _snprintf +#endif + +static char home[512]; /* Program working dir */ +static const char *progname; /* Program name */ +static const char * const uri = "table:main"; + +#define RECORDS_FILE "records" + +#define ENV_CONFIG \ + "create,log=(file_max=100K,archive=false,enabled)," \ + "transaction_sync=(enabled,method=none)" +#define ENV_CONFIG_REC "log=(recover=on)" + +#define LOG_FILE_1 "WiredTigerLog.0000000001" + +#define K_SIZE 16 +#define V_SIZE 256 + +static void +die(int e, const char *m) +{ + fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e)); + exit(EXIT_FAILURE); +} + +/* + * Write a new log record into the log via log print, then open up a log + * cursor and walk the log to make sure we can read it. The reason for this + * test is that if there is a partial log record at the end of the previous + * log file and truncate does not exist, this tests that we can still read + * past that record. + */ +static void +write_and_read_new(WT_SESSION *session) +{ + WT_CURSOR *logc; + WT_ITEM logrec_key, logrec_value; + uint64_t txnid; + uint32_t fileid, log_file, log_offset, opcount, optype, rectype; + int ret; + int saw_msg; + + /* + * Write a log record and force it to disk so we can read it. + */ + printf("Write log_printf record and verify.\n"); + if ((ret = session->log_printf(session, "Test Log Record")) != 0) + die(ret, "log_printf"); + if ((ret = session->log_flush(session, "sync=on")) != 0) + die(ret, "log_flush"); + if ((ret = session->open_cursor( + session, "log:", NULL, NULL, &logc)) != 0) + die(ret, "open_cursor: log"); + if ((ret = session->open_cursor( + session, "log:", NULL, NULL, &logc)) != 0) + die(ret, "open_cursor: log"); + saw_msg = 0; + while ((ret = logc->next(logc)) == 0) { + /* + * We don't really need to get the key, but in case we want + * the LSN for some message, get it. + */ + if ((ret = logc->get_key(logc, + &log_file, &log_offset, &opcount)) != 0) + die(errno, "get_key"); + if ((ret = logc->get_value(logc, &txnid, &rectype, + &optype, &fileid, &logrec_key, &logrec_value)) != 0) + die(errno, "get_value"); + /* + * We should never see a record from log file 2. We wrote + * a record there, but then the record in log file 1 was + * truncated to be a partial record, ending the log there. + * So everything after that, including everything in log + * file 2, is invalid until we get to log file 3 which is where + * the post-recovery records will be written. + */ + if (log_file == 2) + die(EINVAL, "Found LSN in Log 2"); +#if 0 + printf("LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32 + ": record type %" PRIu32 " optype %" PRIu32 + " txnid %" PRIu64 " fileid %" PRIu32 "\n", + log_file, log_offset, opcount, + rectype, optype, txnid, fileid); +#endif + if (rectype == WT_LOGREC_MESSAGE) { + saw_msg = 1; + printf("Application Record: %s\n", + (char *)logrec_value.data); + break; + } + } + if ((ret = logc->close(logc)) != 0) + die(ret, "log cursor close"); + if (!saw_msg) + die(EINVAL, "Did not traverse log printf record"); +} + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-h dir]\n", progname); + exit(EXIT_FAILURE); +} + +/* + * Child process creates the database and table, and then writes data into + * the table until it is killed by the parent. + */ +static void +fill_db(void) +{ + FILE *fp; + WT_CONNECTION *conn; + WT_CURSOR *cursor, *logc; + WT_LSN lsn, save_lsn; + WT_SESSION *session; + uint32_t i, max_key, min_key, units, unused; + int ret; + int first; + char k[K_SIZE], v[V_SIZE]; + + /* + * Run in the home directory so that the records file is in there too. + */ + if (chdir(home) != 0) + die(errno, "chdir"); + if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) + die(ret, "wiredtiger_open"); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "WT_CONNECTION:open_session"); + if ((ret = session->create(session, + uri, "key_format=S,value_format=S")) != 0) + die(ret, "WT_SESSION.create"); + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + die(ret, "WT_SESSION.open_cursor"); + + /* + * Keep a separate file with the records we wrote for checking. + */ + (void)unlink(RECORDS_FILE); + if ((fp = fopen(RECORDS_FILE, "w")) == NULL) + die(errno, "fopen"); + /* + * Set to no buffering. + */ + (void)setvbuf(fp, NULL, _IONBF, 0); + save_lsn.file = 0; + + /* + * Write data into the table until we move to log file 2. + * We do the calculation below so that we don't have to walk the + * log for every record. + * + * Calculate about how many records should fit in the log file. + * Subtract a bunch for metadata and file creation records. + * Then subtract out a few more records to be conservative. + */ + units = (K_SIZE + V_SIZE) / 128 + 1; + min_key = 90000 / (units * 128) - 15; + max_key = min_key * 2; + first = 1; + for (i = 0; i < max_key; ++i) { + snprintf(k, sizeof(k), "key%03d", (int)i); + snprintf(v, sizeof(v), "value%0*d", + (int)(V_SIZE - strlen("value")), (int)i); + cursor->set_key(cursor, k); + cursor->set_value(cursor, v); + if ((ret = cursor->insert(cursor)) != 0) + die(ret, "WT_CURSOR.insert"); + + /* + * Walking the ever growing log can be slow, so only start + * looking for the cross into log file 2 after a minimum. + */ + if (i > min_key) { + if ((ret = session->open_cursor( + session, "log:", NULL, NULL, &logc)) != 0) + die(ret, "open_cursor: log"); + if (save_lsn.file != 0) { + logc->set_key(logc, + save_lsn.file, save_lsn.offset, 0); + if ((ret = logc->search(logc)) != 0) + die(ret, "search"); + } + while ((ret = logc->next(logc)) == 0) { + if ((ret = logc->get_key(logc, + &lsn.file, &lsn.offset, &unused)) != 0) + die(ret, "get_key"); + /* + * Save the LSN so that we know the offset + * of the last LSN in log file 1 later. + */ + if (lsn.file < 2) + save_lsn = lsn; + else { + /* + * If this is the first time through + * that the key is larger than the + * minimum key and we're already in + * log file 2 then we did not calculate + * correctly and the test should fail. + */ + if (first) + die(EINVAL, + "min_key too high"); + if (fprintf(fp, + "%" PRIu64 " %" PRIu32 "\n", + save_lsn.offset, i - 1) == -1) + die(errno, "fprintf"); + break; + } + } + first = 0; + if ((ret = logc->close(logc)) != 0) + die(ret, "log cursor close"); + } + } + if (fclose(fp) != 0) + die(errno, "fclose"); + abort(); + /* NOTREACHED */ +} + +extern int __wt_optind; +extern char *__wt_optarg; + +int +main(int argc, char *argv[]) +{ + FILE *fp; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t new_offset, offset; + uint32_t count, max_key; + int ch, status, ret; + pid_t pid; + const char *working_dir; + char cmd[1024]; + + if ((progname = strrchr(argv[0], '/')) == NULL) + progname = argv[0]; + else + ++progname; + + working_dir = "WT_TEST.truncated-log"; + while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF) + switch (ch) { + case 'h': + working_dir = __wt_optarg; + break; + default: + usage(); + } + argc -= __wt_optind; + argv += __wt_optind; + if (argc != 0) + usage(); + + if ((strlen(working_dir) + 1) > 512) { + fprintf(stderr, + "Not enough memory in buffer for directory %s\n", + working_dir); + return (EXIT_FAILURE); + } + snprintf(home, 512, "%s", working_dir); + snprintf(cmd, 1024, "rm -rf %s", home); + if ((ret = system(cmd)) != 0) + die(ret, "system rm"); + snprintf(cmd, 1024, "mkdir %s", home); + if ((ret = system(cmd)) != 0) + die(ret, "system mkdir"); + + /* + * Fork a child to insert as many items. We will then randomly + * kill the child, run recovery and make sure all items we wrote + * exist after recovery runs. + */ + if ((pid = fork()) < 0) + die(errno, "fork"); + + if (pid == 0) { /* child */ + fill_db(); + return (EXIT_SUCCESS); + } + + /* parent */ + /* Wait for child to kill itself. */ + if (waitpid(pid, &status, 0) == -1) + die(errno, "waitpid"); + + /* + * !!! If we wanted to take a copy of the directory before recovery, + * this is the place to do it. + */ + if (chdir(home) != 0) + die(errno, "chdir"); + + printf("Open database, run recovery and verify content\n"); + if ((fp = fopen(RECORDS_FILE, "r")) == NULL) + die(errno, "fopen"); + ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key); + if (ret != 2) + die(errno, "fscanf"); + if (fclose(fp) != 0) + die(errno, "fclose"); + /* + * The offset is the beginning of the last record. Truncate to + * the middle of that last record (i.e. ahead of that offset). + */ + if (offset > UINT64_MAX - V_SIZE) + die(ERANGE, "offset"); + new_offset = offset + V_SIZE; + printf("Parent: Log file 1: Key %" PRIu32 " at %" PRIu64 "\n", + max_key, offset); + printf("Parent: Truncate mid-record to %" PRIu64 "\n", new_offset); + if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0) + die(errno, "truncate"); + + if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) + die(ret, "wiredtiger_open"); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + die(ret, "WT_CONNECTION:open_session"); + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + die(ret, "WT_SESSION.open_cursor"); + + /* + * For every key in the saved file, verify that the key exists + * in the table after recovery. Since we did write-no-sync, we + * expect every key to have been recovered. + */ + count = 0; + while ((ret = cursor->next(cursor)) == 0) + ++count; + /* + * The max key in the saved file is the key we truncated, but the + * key space starts at 0 and we're counting the records here, so we + * expect the max key number of records. + */ + if (count > max_key) { + printf("expected %" PRIu32 " records found %" PRIu32 "\n", + max_key, count); + return (EXIT_FAILURE); + } + printf("%" PRIu32 " records verified\n", count); + + /* + * Write a log record and then walk the log to make sure we can + * read that log record that is beyond the truncated record. + */ + write_and_read_new(session); + if ((ret = conn->close(conn, NULL)) != 0) + die(ret, "WT_CONNECTION:close"); + return (EXIT_SUCCESS); +} diff --git a/src/third_party/wiredtiger/test/utility/Makefile.am b/src/third_party/wiredtiger/test/utility/Makefile.am new file mode 100644 index 00000000000..a2923eb41a8 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/Makefile.am @@ -0,0 +1,4 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +libtest_util_la_SOURCES = misc.c parse_opts.c thread.c +noinst_LTLIBRARIES = libtest_util.la diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c new file mode 100644 index 00000000000..1491c9a6938 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -0,0 +1,236 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +void (*custom_die)(void) = NULL; + +/* + * die -- + * Report an error and quit. + */ +void +testutil_die(int e, const char *fmt, ...) +{ + va_list ap; + + /* Allow test programs to cleanup on fatal error. */ + if (custom_die != NULL) + (*custom_die)(); + + if (fmt != NULL) { + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + } + if (e != 0) + fprintf(stderr, ": %s", wiredtiger_strerror(e)); + fprintf(stderr, "\n"); + + exit(EXIT_FAILURE); +} + +/* + * testutil_work_dir_from_path -- + * Takes a buffer, its size and the intended work directory. + * Creates the full intended work directory in buffer. + */ +void +testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) +{ + /* If no directory is provided, use the default. */ + if (dir == NULL) + dir = DEFAULT_DIR; + + if (len < strlen(dir) + 1) + testutil_die(ENOMEM, + "Not enough memory in buffer for directory %s", dir); + + strcpy(buffer, dir); +} + +/* + * testutil_clean_work_dir -- + * Remove the work directory. + */ +void +testutil_clean_work_dir(char *dir) +{ + size_t len; + int ret; + char *buf; + +#ifdef _WIN32 + /* Additional bytes for the Windows rd command. */ + len = 2 * strlen(dir) + strlen(RM_COMMAND) + + strlen(DIR_EXISTS_COMMAND) + 4; + if ((buf = malloc(len)) == NULL) + testutil_die(ENOMEM, "Failed to allocate memory"); + + snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, + RM_COMMAND, dir); +#else + len = strlen(dir) + strlen(RM_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) + testutil_die(ENOMEM, "Failed to allocate memory"); + + snprintf(buf, len, "%s%s", RM_COMMAND, dir); +#endif + + if ((ret = system(buf)) != 0 && ret != ENOENT) + testutil_die(ret, "%s", buf); + free(buf); +} + +/* + * testutil_make_work_dir -- + * Delete the existing work directory, then create a new one. + */ +void +testutil_make_work_dir(char *dir) +{ + size_t len; + int ret; + char *buf; + + testutil_clean_work_dir(dir); + + /* Additional bytes for the mkdir command */ + len = strlen(dir) + strlen(MKDIR_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) + testutil_die(ENOMEM, "Failed to allocate memory"); + + /* mkdir shares syntax between Windows and Linux */ + snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + if ((ret = system(buf)) != 0) + testutil_die(ret, "%s", buf); + free(buf); +} + +/* + * testutil_cleanup -- + * Delete the existing work directory and free the options structure. + */ +void +testutil_cleanup(TEST_OPTS *opts) +{ + if (opts->conn != NULL) + testutil_check(opts->conn->close(opts->conn, NULL)); + + if (!opts->preserve) + testutil_clean_work_dir(opts->home); + + free(opts->uri); + free(opts->home); +} + +/* + * testutil_disable_long_tests -- + * Return if TESTUTIL_DISABLE_LONG_TESTS is set. + */ +bool +testutil_disable_long_tests(void) +{ + const char *res; + + if (__wt_getenv(NULL, + "TESTUTIL_DISABLE_LONG_TESTS", &res) == WT_NOTFOUND) + return (false); + + free((void *)res); + return (true); +} + +/* + * dcalloc -- + * Call calloc, dying on failure. + */ +void * +dcalloc(size_t number, size_t size) +{ + void *p; + + if ((p = calloc(number, size)) != NULL) + return (p); + testutil_die(errno, "calloc: %" WT_SIZET_FMT "B", number * size); +} + +/* + * dmalloc -- + * Call malloc, dying on failure. + */ +void * +dmalloc(size_t len) +{ + void *p; + + if ((p = malloc(len)) != NULL) + return (p); + testutil_die(errno, "malloc: %" WT_SIZET_FMT "B", len); +} + +/* + * drealloc -- + * Call realloc, dying on failure. + */ +void * +drealloc(void *p, size_t len) +{ + void *t; + if ((t = realloc(p, len)) != NULL) + return (t); + testutil_die(errno, "realloc: %" WT_SIZET_FMT "B", len); +} + +/* + * dstrdup -- + * Call strdup, dying on failure. + */ +void * +dstrdup(const void *str) +{ + char *p; + + if ((p = strdup(str)) != NULL) + return (p); + testutil_die(errno, "strdup"); +} + +/* + * dstrndup -- + * Call emulating strndup, dying on failure. Don't use actual strndup here + * as it is not supported within MSVC. + */ +void * +dstrndup(const char *str, size_t len) +{ + char *p; + + p = dcalloc(len + 1, sizeof(char)); + memcpy(p, str, len); + return (p); +} diff --git a/src/third_party/wiredtiger/test/utility/parse_opts.c b/src/third_party/wiredtiger/test/utility/parse_opts.c new file mode 100644 index 00000000000..74a1c021d5d --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/parse_opts.c @@ -0,0 +1,130 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +extern char *__wt_optarg; /* argument associated with option */ + +/* + * testutil_parse_opts -- + * Parse command line options for a test case. + */ +int +testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) +{ + int ch; + size_t len; + + opts->preserve = false; + opts->running = true; + opts->verbose = false; + + if ((opts->progname = strrchr(argv[0], DIR_DELIM)) == NULL) + opts->progname = argv[0]; + else + ++opts->progname; + + while ((ch = __wt_getopt(opts->progname, + argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF) + switch (ch) { + case 'A': /* Number of append threads */ + opts->n_append_threads = (uint64_t)atoll(__wt_optarg); + break; + case 'h': /* Home directory */ + opts->home = dstrdup(__wt_optarg); + break; + case 'n': /* Number of records */ + opts->nrecords = (uint64_t)atoll(__wt_optarg); + break; + case 'o': /* Number of operations */ + opts->nops = (uint64_t)atoll(__wt_optarg); + break; + case 'p': /* Preserve directory contents */ + opts->preserve = true; + break; + case 'R': /* Number of reader threads */ + opts->n_read_threads = (uint64_t)atoll(__wt_optarg); + break; + case 'T': /* Number of threads */ + opts->nthreads = (uint64_t)atoll(__wt_optarg); + break; + case 't': /* Table type */ + switch (__wt_optarg[0]) { + case 'C': + case 'c': + opts->table_type = TABLE_COL; + break; + case 'F': + case 'f': + opts->table_type = TABLE_FIX; + break; + case 'R': + case 'r': + opts->table_type = TABLE_ROW; + break; + } + break; + case 'v': + opts->verbose = true; + break; + case 'W': /* Number of writer threads */ + opts->n_write_threads = (uint64_t)atoll(__wt_optarg); + break; + case '?': + default: + (void)fprintf(stderr, "usage: %s " + "[-A append thread count] " + "[-h home] " + "[-n record count] " + "[-o op count] " + "[-p] " + "[-R read thread count] " + "[-T thread count] " + "[-t c|f|r table type] " + "[-v] " + "[-W write thread count] ", + opts->progname); + return (1); + } + + /* + * Setup the home directory if not explicitly specified. It needs to be + * unique for every test or the auto make parallel tester gets upset. + */ + if (opts->home == NULL) { + len = strlen("WT_TEST.") + strlen(opts->progname) + 10; + opts->home = dmalloc(len); + snprintf(opts->home, len, "WT_TEST.%s", opts->progname); + } + + /* Setup the default URI string */ + len = strlen("table:") + strlen(opts->progname) + 10; + opts->uri = dmalloc(len); + snprintf(opts->uri, len, "table:%s", opts->progname); + + return (0); +} diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h new file mode 100644 index 00000000000..f6a9cd68e02 --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/test_util.h @@ -0,0 +1,194 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wt_internal.h" /* For __wt_XXX */ + +#ifdef _WIN32 + #define DIR_DELIM '\\' + #define DIR_DELIM_STR "\\" + #define DIR_EXISTS_COMMAND "IF EXIST " + #define RM_COMMAND "rd /s /q " +#else + #define DIR_DELIM '/' + #define DIR_DELIM_STR "/" + #define RM_COMMAND "rm -rf " +#endif + +#define DEFAULT_DIR "WT_TEST" +#define MKDIR_COMMAND "mkdir " + +#ifdef _WIN32 +#include "windows_shim.h" +#endif + +/* Generic option parsing structure shared by all test cases. */ +typedef struct { + char *home; + char *progname; + enum { TABLE_COL=1, /* Fixed-length column store */ + TABLE_FIX=2, /* Variable-length column store */ + TABLE_ROW=3 /* Row-store */ + } table_type; + bool preserve; /* Don't remove files on exit */ + bool verbose; /* Run in verbose mode */ + uint64_t nrecords; /* Number of records */ + uint64_t nops; /* Number of operations */ + uint64_t nthreads; /* Number of threads */ + uint64_t n_append_threads; /* Number of append threads */ + uint64_t n_read_threads; /* Number of read threads */ + uint64_t n_write_threads; /* Number of write threads */ + + /* + * Fields commonly shared within a test program. The test cleanup + * function will attempt to automatically free and close non-null + * resources. + */ + WT_CONNECTION *conn; + WT_SESSION *session; + bool running; + char *uri; + volatile uint64_t next_threadid; + uint64_t max_inserted_id; +} TEST_OPTS; + +/* + * testutil_assert -- + * Complain and quit if something isn't true. + */ +#define testutil_assert(a) do { \ + if (!(a)) \ + testutil_die(0, "%s/%d: %s", __func__, __LINE__, #a); \ +} while (0) + +/* + * testutil_assertfmt -- + * Complain and quit if something isn't true. + */ +#define testutil_assertfmt(a, fmt, ...) do { \ + if (!(a)) \ + testutil_die(0, "%s/%d: %s: " fmt, \ + __func__, __LINE__, #a, __VA_ARGS__); \ +} while (0) + +/* + * testutil_check -- + * Complain and quit if a function call fails. + */ +#define testutil_check(call) do { \ + int __r; \ + if ((__r = (call)) != 0) \ + testutil_die( \ + __r, "%s/%d: %s", __func__, __LINE__, #call); \ +} while (0) + +/* + * testutil_checkfmt -- + * Complain and quit if a function call fails, with additional arguments. + */ +#define testutil_checkfmt(call, fmt, ...) do { \ + int __r; \ + if ((__r = (call)) != 0) \ + testutil_die(__r, "%s/%d: %s: " fmt, \ + __func__, __LINE__, #call, __VA_ARGS__); \ +} while (0) + +/* + * u64_to_string -- + * Convert a uint64_t to a text string. + * + * Algorithm from Andrei Alexandrescu's talk: "Three Optimization Tips for C++" + */ +static inline void +u64_to_string(uint64_t n, char **pp) +{ + static const char hundred_lookup[201] = + "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; + u_int i; + char *p; + + /* + * The argument pointer references the last element of a buffer (which + * must be large enough to hold any possible value). + * + * Nul-terminate the buffer. + */ + for (p = *pp, *p-- = '\0'; n >= 100; n /= 100) { + i = (n % 100) * 2; + *p-- = hundred_lookup[i + 1]; + *p-- = hundred_lookup[i]; + } + + /* Handle the last two digits. */ + i = (u_int)n * 2; + *p = hundred_lookup[i + 1]; + if (n >= 10) + *--p = hundred_lookup[i]; + + /* Return a pointer to the first byte of the text string. */ + *pp = p; +} + +/* + * u64_to_string_zf -- + * Convert a uint64_t to a text string, zero-filling the buffer. + */ +static inline void +u64_to_string_zf(uint64_t n, char *buf, size_t len) +{ + char *p; + + p = buf + (len - 1); + u64_to_string(n, &p); + + while (p > buf) + *--p = '0'; +} + +/* Allow tests to add their own death handling. */ +extern void (*custom_die)(void); + +void testutil_die(int, const char *, ...) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); + +void *dcalloc(size_t, size_t); +void *dmalloc(size_t); +void *drealloc(void *, size_t); +void *dstrdup(const void *); +void *dstrndup(const char *, size_t); +void testutil_clean_work_dir(char *); +void testutil_cleanup(TEST_OPTS *); +bool testutil_disable_long_tests(void); +void testutil_make_work_dir(char *); +int testutil_parse_opts(int, char * const *, TEST_OPTS *); +void testutil_work_dir_from_path(char *, size_t, const char *); +void *thread_append(void *); +void *thread_insert_append(void *); +void *thread_prev(void *); diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c new file mode 100644 index 00000000000..38465b2f02b --- /dev/null +++ b/src/third_party/wiredtiger/test/utility/thread.c @@ -0,0 +1,141 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "test_util.h" + +/* + * A thread dedicated to appending records into a table. Works with fixed + * length column stores and variable length column stores. + * One thread (the first thread created by an application) checks for a + * terminating condition after each insert. + */ +void * +thread_append(void *arg) +{ + TEST_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t id, recno; + char buf[64]; + + opts = (TEST_OPTS *)arg; + conn = opts->conn; + + id = __wt_atomic_fetch_addv64(&opts->next_threadid, 1); + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, "append", &cursor)); + + buf[0] = '\2'; + for (recno = 1; opts->running; ++recno) { + if (opts->table_type == TABLE_FIX) + cursor->set_value(cursor, buf[0]); + else { + snprintf(buf, sizeof(buf), + "%" PRIu64 " VALUE ------", recno); + cursor->set_value(cursor, buf); + } + testutil_check(cursor->insert(cursor)); + if (id == 0) { + testutil_check( + cursor->get_key(cursor, &opts->max_inserted_id)); + if (opts->max_inserted_id >= opts->nrecords) + opts->running = false; + } + } + + return (NULL); +} + +/* + * Append into a row store table. + */ +void * +thread_insert_append(void *arg) +{ + TEST_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + uint64_t i; + char kbuf[64]; + + opts = (TEST_OPTS *)arg; + conn = opts->conn; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->open_cursor( + session, opts->uri, NULL, NULL, &cursor)); + + for (i = 0; i < opts->nrecords; ++i) { + snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i); + cursor->set_key(cursor, kbuf); + cursor->set_value(cursor, "========== VALUE ======="); + testutil_check(cursor->insert(cursor)); + if (i % 100000 == 0) { + printf("insert: %" PRIu64 "\r", i); + fflush(stdout); + } + } + printf("\n"); + + opts->running = false; + + return (NULL); +} + +/* + * Repeatedly walk backwards through the records in a table. + */ +void * +thread_prev(void *arg) +{ + TEST_OPTS *opts; + WT_CURSOR *cursor; + WT_SESSION *session; + int ret; + + opts = (TEST_OPTS *)arg; + ret = 0; + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check( + session->open_cursor(session, opts->uri, NULL, NULL, &cursor)); + while (opts->running) { + while (opts->running && (ret = cursor->prev(cursor)) == 0) + ; + if (ret == WT_NOTFOUND) + ret = 0; + testutil_check(ret); + } + + testutil_check(session->close(session, NULL)); + return (NULL); +} |