summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/strict.m41
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py12
-rw-r--r--src/third_party/wiredtiger/dist/dist.py2
-rw-r--r--src/third_party/wiredtiger/dist/function.py1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_function5
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py2
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c15
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/lang/java/java_doc.i1
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c26
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c14
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c75
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c93
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c26
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c26
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c471
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c16
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c3
-rw-r--r--src/third_party/wiredtiger/src/include/block.h1
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i4
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h4
-rw-r--r--src/third_party/wiredtiger/src/include/config.h47
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h1
-rw-r--r--src/third_party/wiredtiger/src/include/error.h7
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h7
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h34
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in32
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c99
-rw-r--r--src/third_party/wiredtiger/src/optrack/optrack.c5
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_abort.c2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c10
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c8
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c59
-rw-r--r--src/third_party/wiredtiger/src/support/err.c29
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c8
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c62
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c21
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c104
-rw-r--r--src/third_party/wiredtiger/test/csuite/rwlock/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c10
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c13
-rw-r--r--src/third_party/wiredtiger/test/format/bdb.c2
-rw-r--r--src/third_party/wiredtiger/test/format/format.h8
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c20
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c465
-rw-r--r--src/third_party/wiredtiger/test/suite/test_assert04.py48
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare01.py179
-rw-r--r--src/third_party/wiredtiger/test/syscall/wt2336_base/main.c2
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c4
-rw-r--r--src/third_party/wiredtiger/test/utility/thread.c4
59 files changed, 1482 insertions, 642 deletions
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
index 92e77815a18..9e2bec3f396 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
@@ -36,7 +36,6 @@ AC_DEFUN([AM_GCC_WARNINGS], [
w="$w -Wwrite-strings"
# Non-fatal informational warnings.
- w="$w -Wno-error=inline"
w="$w -Wno-error=unsafe-loop-optimizations"
# GCC 4.7
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 78dc25850cf..93fdb0e6a4f 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1137,6 +1137,10 @@ methods = {
]),
'WT_SESSION.begin_transaction' : Method([
+ Config('ignore_prepare', 'false', r'''
+ whether to ignore the updates by other prepared transactions as part of
+ read operations of this transaction''',
+ type='boolean'),
Config('isolation', '', r'''
the isolation level for this transaction; defaults to the
session's isolation level''',
@@ -1182,6 +1186,14 @@ methods = {
choices=['background', 'off', 'on']),
]),
+'WT_SESSION.prepare_transaction' : Method([
+ Config('prepare_timestamp', '', r'''
+ set the prepare timestamp for the updates of the current transaction.
+ The supplied value should not be older than any active read timestamps.
+ This configuration option is mandatory. See
+ @ref transaction_timestamps'''),
+]),
+
'WT_SESSION.timestamp_transaction' : Method([
Config('commit_timestamp', '', r'''
set the commit timestamp for the current transaction. The supplied
diff --git a/src/third_party/wiredtiger/dist/dist.py b/src/third_party/wiredtiger/dist/dist.py
index e4b76bdbab4..7fe473c3abd 100644
--- a/src/third_party/wiredtiger/dist/dist.py
+++ b/src/third_party/wiredtiger/dist/dist.py
@@ -21,6 +21,8 @@ def all_c_files():
yield line
for line in glob.iglob('../test/*/*.[ci]'):
yield line
+ for line in glob.iglob('../test/*/*/*.[ci]'):
+ yield line
# all_h_files --
# Return list of all WiredTiger C include file names.
diff --git a/src/third_party/wiredtiger/dist/function.py b/src/third_party/wiredtiger/dist/function.py
index 0e36a539cc4..69ebd4748dc 100644
--- a/src/third_party/wiredtiger/dist/function.py
+++ b/src/third_party/wiredtiger/dist/function.py
@@ -44,6 +44,7 @@ types = [
'struct',
'union',
'enum',
+ 'TEST_',
'WT_',
'wt_',
'double',
diff --git a/src/third_party/wiredtiger/dist/s_function b/src/third_party/wiredtiger/dist/s_function
index 3259e215d0c..314a8c5bb06 100755
--- a/src/third_party/wiredtiger/dist/s_function
+++ b/src/third_party/wiredtiger/dist/s_function
@@ -73,10 +73,11 @@ done
for f in `find bench examples ext src test -name '*.[ci]'`; do
sed -n -e '/API_CALL.*;$/,/API_END.*;/{=;p;}' \
-e '/LSM_.*ENTER*;$/,/LSM_.*LEAVE*;/{=;p;}' \
+ -e '/WT_TRACK_OP_INIT/,/WT_TRACK_OP_END/{=;p;}' \
-e '/va_start/,/va_end/{=;p;}' $f | \
sed 'N;s/\n/:/' | \
- egrep -w 'return|WT_RET' | \
- sed -e "s,^,$f:," -e 's/$/ [return skips API_END call]/'
+ egrep -w 'return;|return \(|WT_RET' | \
+ sed -e "s,^,$f:," -e 's/$/ [return skips matching end call]/'
done
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 6cd3f219b4a..20f3c72b2b7 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -493,7 +493,7 @@ connection_stats = [
TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_commit', 'transactions committed'),
TxnStat('txn_commit_queue_empty', 'commit timestamp queue insert to empty'),
- TxnStat('txn_commit_queue_head', 'commit timestamp queue inserts to head'),
+ TxnStat('txn_commit_queue_tail', 'commit timestamp queue inserts to tail'),
TxnStat('txn_commit_queue_inserts', 'commit timestamp queue inserts total'),
TxnStat('txn_commit_queue_len', 'commit timestamp queue length'),
TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 7091a0a76d6..8e40b9f0b6c 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -900,6 +900,21 @@ transaction_ops(WT_SESSION *session_arg)
error_check(session->commit_transaction(session, NULL));
/*! [transaction isolation] */
+ /*! [transaction prepare] */
+ /*
+ * Prepare a transaction which guarantees a subsequent commit will
+ * succeed. Only commit and rollback are allowed on a transaction after
+ * it has been prepared.
+ */
+ error_check(session->open_cursor(
+ session, "table:mytable", NULL, NULL, &cursor));
+ error_check(session->begin_transaction(session, NULL));
+ cursor->set_key(cursor, "key");
+ cursor->set_value(cursor, "value");
+ session->prepare_transaction(session, "prepare_timestamp=2a");
+ error_check(session->commit_transaction(session, NULL));
+ /*! [transaction prepare] */
+
/*! [session isolation configuration] */
/* Open a session configured for read-uncommitted isolation. */
error_check(conn->open_session(
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 2113eef94fe..d3d7ff86d32 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "b85bcfde3b7e41a35017385fd219553e7028c427",
+ "commit": "bc82f0f0383af9ef41ece37a36ae9200ea4561a3",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/lang/java/java_doc.i b/src/third_party/wiredtiger/lang/java/java_doc.i
index 768f06e4ec1..64dbf7850b0 100644
--- a/src/third_party/wiredtiger/lang/java/java_doc.i
+++ b/src/third_party/wiredtiger/lang/java/java_doc.i
@@ -48,6 +48,7 @@ COPYDOC(__wt_session, WT_SESSION, upgrade)
COPYDOC(__wt_session, WT_SESSION, verify)
COPYDOC(__wt_session, WT_SESSION, begin_transaction)
COPYDOC(__wt_session, WT_SESSION, commit_transaction)
+COPYDOC(__wt_session, WT_SESSION, prepare_transaction)
COPYDOC(__wt_session, WT_SESSION, rollback_transaction)
COPYDOC(__wt_session, WT_SESSION, timestamp_transaction)
COPYDOC(__wt_session, WT_SESSION, checkpoint)
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 55df1527e98..73a3d13e307 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -221,10 +221,11 @@ __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
case WT_CKPT_INPROGRESS:
case WT_CKPT_PANIC_ON_FAILURE:
case WT_CKPT_SALVAGE:
- ret = __wt_block_panic(session, EINVAL,
+ __wt_err(session, EINVAL,
"%s: an unexpected checkpoint start: the checkpoint "
"has already started or was configured for salvage",
block->name);
+ ret = __wt_block_panic(session);
break;
case WT_CKPT_NONE:
block->ckpt_state = WT_CKPT_INPROGRESS;
@@ -433,10 +434,11 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
break;
case WT_CKPT_NONE:
case WT_CKPT_PANIC_ON_FAILURE:
- ret = __wt_block_panic(session, EINVAL,
+ __wt_err(session, EINVAL,
"%s: an unexpected checkpoint attempt: the checkpoint "
"was never started or has already completed",
block->name);
+ ret = __wt_block_panic(session);
break;
case WT_CKPT_SALVAGE:
/* Salvage doesn't use the standard checkpoint APIs. */
@@ -718,9 +720,11 @@ live_update:
"list");
#endif
-err: if (ret != 0 && fatal)
- ret = __wt_block_panic(session, ret,
+err: if (ret != 0 && fatal) {
+ __wt_err(session, ret,
"%s: fatal checkpoint failure", block->name);
+ ret = __wt_block_panic(session);
+ }
if (locked)
__wt_spin_unlock(session, &block->live_lock);
@@ -842,26 +846,30 @@ __wt_block_checkpoint_resolve(
goto done;
case WT_CKPT_NONE:
case WT_CKPT_SALVAGE:
- ret = __wt_block_panic(session, EINVAL,
+ __wt_err(session, EINVAL,
"%s: an unexpected checkpoint resolution: the checkpoint "
"was never started or completed, or configured for salvage",
block->name);
+ ret = __wt_block_panic(session);
break;
case WT_CKPT_PANIC_ON_FAILURE:
if (!failed)
break;
- ret = __wt_block_panic(session, EINVAL,
+ __wt_err(session, EINVAL,
"%s: the checkpoint failed, the system must restart",
block->name);
+ ret = __wt_block_panic(session);
break;
}
WT_ERR(ret);
if ((ret = __wt_block_extlist_merge(
- session, block, &ci->ckpt_avail, &ci->avail)) != 0)
- WT_ERR(__wt_block_panic(session, ret,
+ session, block, &ci->ckpt_avail, &ci->avail)) != 0) {
+ __wt_err(session, ret,
"%s: fatal checkpoint failure during extent list merge",
- block->name));
+ block->name);
+ ret = __wt_block_panic(session);
+ }
__wt_spin_unlock(session, &block->live_lock);
/* Discard the lists remaining after the checkpoint call. */
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index a2790863961..5fe4cb51fac 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -569,6 +569,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_page_skip = __bm_compact_page_skip;
bm->compact_skip = __bm_compact_skip;
bm->compact_start = __bm_compact_start;
+ bm->corrupt = __wt_bm_corrupt;
bm->free = __bm_free;
bm->is_mapped = __bm_is_mapped;
bm->map_discard = __bm_map_discard;
@@ -638,20 +639,9 @@ err: WT_TRET(bm->close(bm, session));
* Report an error, then panic the handle and the system.
*/
int
-__wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...)
+__wt_block_panic(WT_SESSION_IMPL *session)
WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4)))
{
- va_list ap;
-
- /*
- * Ignore error returns from underlying event handlers, we already have
- * an error value to return.
- */
- va_start(ap, fmt);
- WT_IGNORE_RET(__wt_eventv(session, false, error, NULL, 0, fmt, ap));
- va_end(ap);
-
/* Switch the handle into read-only mode. */
__bm_method_set(S2BT(session)->bm, true);
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index cd419566e40..ec44885f56a 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -107,6 +107,77 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session,
return (0);
}
+/*
+ * __wt_bm_corrupt_dump --
+ * Dump a block into the log in 1KB chunks.
+ */
+static int
+__wt_bm_corrupt_dump(WT_SESSION_IMPL *session,
+ WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum)
+ WT_GCC_FUNC_ATTRIBUTE((cold))
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ size_t chunk, i, nchunks;
+
+#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "}"
+ if (buf->size == 0) {
+ __wt_errx(session,
+ WT_CORRUPT_FMT ": empty buffer, no dump available",
+ (uintmax_t)offset, size, checksum);
+ return (0);
+ }
+
+ WT_RET(__wt_scr_alloc(session, 4 * 1024, &tmp));
+
+ nchunks = buf->size / 1024 + (buf->size % 1024 == 0 ? 0 : 1);
+ for (chunk = i = 0;;) {
+ WT_ERR(__wt_buf_catfmt(
+ session, tmp, "%02x ", ((uint8_t *)buf->data)[i]));
+ if (++i == buf->size || i % 1024 == 0) {
+ __wt_errx(session,
+ WT_CORRUPT_FMT
+ ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT
+ "): %.*s",
+ (uintmax_t)offset, size, checksum,
+ ++chunk, nchunks,
+ (int)tmp->size, (char *)tmp->data);
+ if (i == buf->size)
+ break;
+ WT_ERR(__wt_buf_set(session, tmp, "", 0));
+ }
+ }
+
+err: __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __wt_bm_corrupt --
+ * Report a block has been corrupted, external API.
+ */
+int
+__wt_bm_corrupt(WT_BM *bm,
+ WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ wt_off_t offset;
+ uint32_t checksum, size;
+
+ /* Read the block. */
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size));
+
+ /* Crack the cookie, dump the block. */
+ WT_ERR(__wt_block_buffer_to_addr(
+ bm->block, addr, &offset, &size, &checksum));
+ WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum));
+
+err: __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_block_read_off_blind --
@@ -221,6 +292,10 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
"of %" PRIu32,
size, (uintmax_t)offset, swap.checksum, checksum);
+ if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
+ WT_IGNORE_RET(
+ __wt_bm_corrupt_dump(session, buf, offset, size, checksum));
+
/* Panic if a checksum fails during an ordinary read. */
return (block->verify ||
F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE) ?
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 8c7170e6180..1372b964dbd 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -59,6 +59,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
WT_SESSION_IMPL *session;
+ uint32_t current_state;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
@@ -68,7 +69,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt)
* external key.
*/
if (!F_ISSET(cbt, WT_CBT_ACTIVE)) {
- WT_ASSERT((WT_SESSION_IMPL *)cursor->session,
+ WT_ASSERT(session,
cbt->ref == NULL && !F_ISSET(cursor, WT_CURSTD_KEY_INT));
return (false);
}
@@ -89,12 +90,16 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt)
return (false);
/*
- * If we are doing an update, we need a page with history. Release the
- * page so we get it again with history if required.
+ * If we are doing an update, we need a page with history, release the
+ * page so we get it again with history if required. Eviction may be
+ * locking the page, wait until we see a "normal" state and then test
+ * against that state (eviction may have already locked the page again).
*/
- if (F_ISSET(&session->txn, WT_TXN_UPDATE) &&
- cbt->ref->state != WT_REF_MEM)
- return (false);
+ if (F_ISSET(&session->txn, WT_TXN_UPDATE)) {
+ while ((current_state = cbt->ref->state) == WT_REF_LOCKED)
+ __wt_yield();
+ return (current_state == WT_REF_MEM);
+ }
return (true);
}
@@ -654,6 +659,12 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
} else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND)
exact = 1;
else {
+ /*
+ * The cursor next call may have overwritten our caller's key,
+ * restore it to its original value.
+ */
+ __cursor_state_restore(cursor, &state);
+
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, true) :
@@ -811,9 +822,9 @@ err: if (ret == WT_RESTART) {
goto retry;
}
-done: /* Insert doesn't maintain a position across calls, clear resources. */
+ /* Insert doesn't maintain a position across calls, clear resources. */
if (ret == 0) {
- F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+done: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
if (append_key)
F_SET(cursor, WT_CURSTD_KEY_EXT);
}
@@ -921,10 +932,12 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ bool iterating;
btree = cbt->btree;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
+ iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
WT_STAT_CONN_INCR(session, cursor_remove);
WT_STAT_DATA_INCR(session, cursor_remove);
@@ -991,35 +1004,23 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
__cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
if (ret == 0)
goto done;
-
- /*
- * The pinned page goes away if we fail for any reason, get a
- * local copy of any pinned key and discard any value (remove
- * discards any previous value on success or failure). (Restart
- * could still use the pinned page, but that's an unlikely
- * path.) Re-save the cursor state: we may retry but eventually
- * fail.
- */
- WT_TRET(__cursor_localkey(cursor));
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
- __cursor_state_save(cursor, &state);
goto err;
}
- if (positioned == POSITIONED)
- positioned = SEARCH_POSITION;
-
/*
- * The pinned page goes away if we do a search, get a local copy of any
- * pinned key and discard any value (remove discards any previous
- * value on success or failure). Re-save the cursor state: we may retry
- * but eventually fail.
+ * The pinned page goes away if we do a search, including as a result of
+ * a restart. Get a local copy of any pinned key and re-save the cursor
+ * state: we may retry but eventually fail.
+ *
+ * Note these steps must be repeatable, we'll continue to take this path
+ * as long as we encounter WT_RESTART.
*/
+retry: if (positioned == POSITIONED)
+ positioned = SEARCH_POSITION;
WT_ERR(__cursor_localkey(cursor));
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
__cursor_state_save(cursor, &state);
-retry: WT_ERR(__cursor_func_init(cbt, true));
+ WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(session, cbt, NULL, false));
@@ -1067,9 +1068,8 @@ err: if (ret == WT_RESTART) {
goto retry;
}
-done: if (ret == 0) {
- F_CLR(cursor, WT_CURSTD_VALUE_SET);
- switch (positioned) {
+ if (ret == 0) {
+done: switch (positioned) {
case NO_POSITION:
/*
* Never positioned and we leave it that way, clear any
@@ -1099,19 +1099,35 @@ done: if (ret == 0) {
__cursor_state_restore(cursor, &state);
/*
- * If the cursor is configured to overwrite and the record isn't
- * found, that is exactly what we want, return success. Note we
- * set clear the return value after everything else, the clause
+ * If the record isn't found and the cursor is configured for
+ * overwrite, that is what we want, try to return success.
+ *
+ * We set the return to 0 after testing for success, the clause
* above dealing with the cursor position is only correct if we
* were successful. If search failed after positioned is set to
* SEARCH_POSITION, we cannot return a key. The only action to
* take is to set the cursor to its original key, which we just
* did.
+ *
+ * Finally, if an iterating or positioned cursor was forced to
+ * give up its pinned page and then a search failed, we've
+ * lost our cursor position. Since no subsequent iteration can
+ * succeed, we cannot return success.
*/
- if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND &&
+ F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
+ !iterating && positioned == NO_POSITION)
ret = 0;
}
+ /*
+ * Upper level cursor removes don't expect the cursor value to be set
+ * after a successful remove (and check in diagnostic mode). Error
+ * handling may have converted failure to a success, do a final check.
+ */
+ if (ret == 0)
+ F_CLR(cursor, WT_CURSTD_VALUE_SET);
+
return (ret);
}
@@ -1230,8 +1246,8 @@ err: if (ret == WT_RESTART) {
* To make this work, we add a field to the btree cursor to pass back a
* pointer to the modify function's allocated update structure.
*/
-done: if (ret == 0)
- switch (modify_type) {
+ if (ret == 0) {
+done: switch (modify_type) {
case WT_UPDATE_STANDARD:
/*
* WT_CURSOR.update returns a key and a value.
@@ -1258,6 +1274,7 @@ done: if (ret == 0)
WT_TRET(__wt_illegal_value(session, NULL));
break;
}
+ }
if (ret != 0) {
WT_TRET(__cursor_reset(cbt));
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 325aec853e5..6575080c858 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -511,9 +511,8 @@ __debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
* Pretty-print information about a page.
*/
static char *
-__debug_tree_shape_info(WT_PAGE *page)
+__debug_tree_shape_info(WT_PAGE *page, char *buf, size_t len)
{
- static char buf[128];
uint64_t v;
const char *unit;
@@ -532,7 +531,7 @@ __debug_tree_shape_info(WT_PAGE *page)
unit = "B";
}
- (void)__wt_snprintf(buf, sizeof(buf), "(%p, %" PRIu64
+ (void)__wt_snprintf(buf, len, "(%p, %" PRIu64
"%s, evict gen %" PRIu64 ", create gen %" PRIu64 ")",
(void *)page, v, unit,
page->evict_pass_gen, page->cache_create_gen);
@@ -548,12 +547,14 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level)
{
WT_REF *ref;
WT_SESSION_IMPL *session;
+ char buf[128];
session = ds->session;
if (WT_PAGE_IS_INTERNAL(page)) {
WT_RET(ds->f(ds, "%*s" "I" "%d %s\n",
- level * 3, " ", level, __debug_tree_shape_info(page)));
+ level * 3, " ", level,
+ __debug_tree_shape_info(page, buf, sizeof(buf))));
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM)
WT_RET(__debug_tree_shape_worker(
@@ -561,7 +562,8 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level)
} WT_INTL_FOREACH_END;
} else
WT_RET(ds->f(ds, "%*s" "L" " %s\n",
- level * 3, " ", __debug_tree_shape_info(page)));
+ level * 3, " ",
+ __debug_tree_shape_info(page, buf, sizeof(buf))));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 370e81673d8..e6f8bad8e31 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -398,32 +398,18 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
/* Debugging information */
WT_RET(__wt_config_gets(session,
cfg, "assert.commit_timestamp", &cval));
- if (WT_STRING_MATCH("always", cval.str, cval.len)) {
+ btree->assert_flags = 0;
+ if (WT_STRING_MATCH("always", cval.str, cval.len))
FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS);
- FLD_CLR(btree->assert_flags,
- WT_ASSERT_COMMIT_TS_KEYS | WT_ASSERT_COMMIT_TS_NEVER);
- } else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len)) {
+ else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len))
FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS);
- FLD_CLR(btree->assert_flags,
- WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_COMMIT_TS_NEVER);
- } else if (WT_STRING_MATCH("never", cval.str, cval.len)) {
+ else if (WT_STRING_MATCH("never", cval.str, cval.len))
FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER);
- FLD_CLR(btree->assert_flags,
- WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_COMMIT_TS_KEYS);
- } else
- FLD_CLR(btree->assert_flags,
- WT_ASSERT_COMMIT_TS_ALWAYS |
- WT_ASSERT_COMMIT_TS_KEYS | WT_ASSERT_COMMIT_TS_NEVER);
WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval));
- if (WT_STRING_MATCH("always", cval.str, cval.len)) {
+ if (WT_STRING_MATCH("always", cval.str, cval.len))
FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
- FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
- } else if (WT_STRING_MATCH("never", cval.str, cval.len)) {
+ else if (WT_STRING_MATCH("never", cval.str, cval.len))
FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
- FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
- } else
- FLD_CLR(btree->assert_flags,
- WT_ASSERT_READ_TS_ALWAYS | WT_ASSERT_READ_TS_NEVER);
/* Huffman encoding */
WT_RET(__wt_btree_huffman_open(session));
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 007513fd581..4c108114438 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -154,7 +154,9 @@ corrupt: if (ret == 0)
if (!F_ISSET(btree, WT_BTREE_VERIFY) &&
!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) {
__wt_err(session, ret, "%s", fail_msg);
- ret = __wt_illegal_value(session, btree->dhandle->name);
+ WT_TRET(bm->corrupt(bm, session, addr, addr_size));
+ WT_TRET(
+ __wt_illegal_value(session, btree->dhandle->name));
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index afaf6c82aa5..20e6c8c7b4d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -209,6 +209,7 @@ __las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid)
static int
__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
{
+ WT_CACHE *cache;
WT_CURSOR *cursor;
WT_CURSOR_BTREE cbt;
WT_DECL_ITEM(current_key);
@@ -221,15 +222,18 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
uint32_t las_id, session_flags;
const uint8_t *p;
uint8_t upd_type;
+ bool locked;
cursor = NULL;
page = ref->page;
first_upd = last_upd = upd = NULL;
+ locked = false;
total_incr = 0;
current_recno = recno = WT_RECNO_OOB;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_key);
+ cache = S2C(session)->cache;
__las_page_instantiate_verbose(session, ref->page_las->las_pageid);
WT_STAT_CONN_INCR(session, cache_read_lookaside);
WT_STAT_DATA_INCR(session, cache_read_lookaside);
@@ -251,6 +255,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
*/
ret = __wt_las_cursor_position(
cursor, btree_id, ref->page_las->las_pageid);
+ __wt_readlock(session, &cache->las_sweepwalk_lock);
+ locked = true;
for (; ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
@@ -317,6 +323,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
}
upd = NULL;
}
+ __wt_readunlock(session, &cache->las_sweepwalk_lock);
+ locked = false;
WT_ERR_NOTFOUND_OK(ret);
/* Insert the last set of updates, if any. */
@@ -369,7 +377,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
}
}
-err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+err: if (locked)
+ __wt_readunlock(session, &cache->las_sweepwalk_lock);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
WT_TRET(__wt_btcur_close(&cbt, true));
/*
@@ -498,17 +508,9 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
final_state = WT_REF_MEM;
- /*
- * If we already have the page image, just instantiate the history.
- *
- * We need exclusive access because other threads could be reading the
- * page without history and we can't change the state underneath them.
- */
- if (previous_state == WT_REF_LIMBO) {
- if (__wt_hazard_check(session, ref) != NULL)
- goto err;
+ /* If we already have the page image, just instantiate the history. */
+ if (previous_state == WT_REF_LIMBO)
goto skip_read;
- }
/*
* Get the address: if there is no address, the page was deleted or had
@@ -608,7 +610,7 @@ skip_read:
* Don't free WT_REF.page_las, there may be concurrent readers.
*/
WT_TRET(__wt_las_remove_block(
- session, NULL, btree->id, ref->page_las->las_pageid));
+ session, btree->id, ref->page_las->las_pageid));
ref->page_las->eviction_to_lookaside = false;
break;
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 5fa46cb7fb2..1f0b9c4b285 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -19,6 +19,29 @@
WT_SESSION_NO_RECONCILE)
/*
+ * __las_set_isolation --
+ * Switch to read-uncommitted.
+ */
+static void
+__las_set_isolation(
+ WT_SESSION_IMPL *session, WT_TXN_ISOLATION *saved_isolationp)
+{
+ *saved_isolationp = session->txn.isolation;
+ session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
+}
+
+/*
+ * __las_restore_isolation --
+ * Restore isolation.
+ */
+static void
+__las_restore_isolation(
+ WT_SESSION_IMPL *session, WT_TXN_ISOLATION saved_isolation)
+{
+ session->txn.isolation = saved_isolation;
+}
+
+/*
* __wt_las_nonempty --
* Return when there are entries in the lookaside table.
*/
@@ -56,11 +79,15 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
return;
+ /* Set the connection-wide statistics. */
+ cstats = conn->stats;
+ WT_STAT_SET(
+ session, cstats, cache_lookaside_entries, cache->las_entry_count);
+
/*
* We have a cursor, and we need the underlying data handle; we can get
* to it by way of the underlying btree handle, but it's a little ugly.
*/
- cstats = conn->stats;
dstats = ((WT_CURSOR_BTREE *)
cache->las_session[0]->las_cursor)->btree->dhandle->stats;
@@ -68,6 +95,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
v = WT_STAT_READ(dstats, cursor_remove);
WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
+
/*
* If we're clearing stats we need to clear the cursor values we just
* read. This does not clear the rest of the statistics in the
@@ -326,6 +354,51 @@ __wt_las_cursor_close(
}
/*
+ * __las_remove_block --
+ * Remove all records for a given page from the lookaside store.
+ */
+static int
+__las_remove_block(WT_SESSION_IMPL *session,
+ WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid, uint64_t *decrp)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_ITEM las_key;
+ uint64_t las_counter, las_pageid;
+ uint32_t las_id;
+
+ *decrp = 0;
+
+ conn = S2C(session);
+
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+
+ /*
+ * Search for the block's unique btree ID and page ID prefix and step
+ * through all matching records, removing them.
+ */
+ for (ret = __wt_las_cursor_position(cursor, btree_id, pageid);
+ ret == 0; ret = cursor->next(cursor)) {
+ WT_ERR(cursor->get_key(cursor,
+ &las_pageid, &las_id, &las_counter, &las_key));
+
+ /*
+ * Confirm the record matches; if not a match, we're done
+ * searching for records for this page.
+ */
+ if (las_pageid != pageid || las_id != btree_id)
+ break;
+
+ WT_ERR(cursor->remove(cursor));
+ ++*decrp;
+ }
+ WT_ERR_NOTFOUND_OK(ret);
+
+err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+ return (ret);
+}
+
+/*
* __las_insert_block_verbose --
* Display a verbose message once per checkpoint with details about the
* cache state when performing a lookaside table write.
@@ -335,14 +408,13 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
+ double pct_dirty, pct_full;
+ uint64_t ckpt_gen_current, ckpt_gen_last;
+ uint32_t btree_id;
#ifdef HAVE_TIMESTAMPS
char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
-#else
- char hex_timestamp[9]; /* Enough for disabled string */
#endif
- uint64_t ckpt_gen_current, ckpt_gen_last;
- uint32_t btree_id;
- double pct_dirty, pct_full;
+ const char *ts;
btree_id = S2BT(session)->id;
@@ -370,9 +442,9 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
#ifdef HAVE_TIMESTAMPS
WT_RET(__wt_timestamp_to_hex_string(
session, hex_timestamp, &multi->page_las.min_timestamp));
+ ts = hex_timestamp;
#else
- WT_RET(__wt_snprintf(
- hex_timestamp, sizeof(hex_timestamp), "disabled"));
+ ts = "disabled";
#endif
__wt_verbose(session,
WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
@@ -384,7 +456,7 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
"cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid,
multi->page_las.las_max_txn,
- hex_timestamp,
+ ts,
multi->page_las.las_skew_newest ? "newest" : "oldest",
WT_STAT_READ(conn->stats, cache_lookaside_entries),
pct_dirty, pct_full);
@@ -398,44 +470,52 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
/*
* __wt_las_insert_block --
- * Copy one set of saved updates into the database's lookaside buffer.
+ * Copy one set of saved updates into the database's lookaside table.
*/
int
__wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key)
{
WT_BTREE *btree;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_ITEM las_timestamp, las_value;
WT_SAVE_UPD *list;
WT_SESSION_IMPL *las_session;
+ WT_TXN_ISOLATION saved_isolation;
WT_UPDATE *upd;
- uint64_t insert_cnt, las_counter, las_pageid;
+ uint64_t decrement_cnt, insert_cnt, insert_estimate;
+ uint64_t las_counter, las_pageid;
uint32_t btree_id, i, slot;
uint8_t *p;
+ bool local_txn;
+ btree = S2BT(session);
+ conn = S2C(session);
WT_CLEAR(las_timestamp);
WT_CLEAR(las_value);
- insert_cnt = 0;
-
- btree = S2BT(session);
+ decrement_cnt = insert_cnt = insert_estimate = 0;
btree_id = btree->id;
+ local_txn = false;
+
las_pageid = multi->page_las.las_pageid =
- __wt_atomic_add64(&S2C(session)->cache->las_pageid, 1);
+ __wt_atomic_add64(&conn->cache->las_pageid, 1);
if (!btree->lookaside_entries)
btree->lookaside_entries = true;
/* Wrap all the updates in a transaction. */
las_session = (WT_SESSION_IMPL *)cursor->session;
- WT_RET(__wt_txn_begin(las_session, NULL));
- las_session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
+ __las_set_isolation(las_session, &saved_isolation);
+ WT_ERR(__wt_txn_begin(las_session, NULL));
+ local_txn = true;
/*
* Make sure there are no leftover entries (e.g., from a handle
* reopen).
*/
- WT_ERR(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
+ WT_ERR(__las_remove_block(
+ session, cursor, btree_id, las_pageid, &decrement_cnt));
/* Enter each update in the boundary's list into the lookaside store. */
for (las_counter = 0, i = 0,
@@ -531,6 +611,18 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
upd->type, &las_value);
/*
+ * If remove is running concurrently, it's possible for
+ * records to be removed before the insert transaction
+ * commit (remove is configured read-uncommitted). Make
+ * sure increments stay ahead of decrements.
+ */
+ if (insert_estimate <= insert_cnt) {
+ insert_estimate += 100;
+ (void)__wt_atomic_add64(
+ &conn->cache->las_entry_count, 100);
+ }
+
+ /*
* Using update looks a little strange because the keys
* are guaranteed to not exist, but since we're
* appending, we want the cursor to stay positioned in
@@ -541,21 +633,38 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
} while ((upd = upd->next) != NULL);
}
+err: /* Resolve the transaction. */
+ if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(las_session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(las_session, NULL));
+ }
+
+ __las_restore_isolation(las_session, saved_isolation);
+
+ /*
+ * If the transaction successfully committed and we inserted records,
+ * adjust the final entry count. We may have also deleted records,
+ * but we must have intended to insert records to be in this function
+ * at all, checking the insert count is sufficient.
+ */
if (insert_cnt > 0) {
- WT_STAT_CONN_INCRV(
- session, cache_lookaside_entries, insert_cnt);
- (void)__wt_atomic_add64(
- &S2C(session)->cache->las_entry_count, insert_cnt);
- WT_ERR(__las_insert_block_verbose(session, multi));
+ if (ret == 0) {
+ (void)__wt_atomic_add64(
+ &conn->cache->las_entry_count,
+ insert_estimate - insert_cnt);
+ __wt_cache_decr_check_uint64(session,
+ &conn->cache->las_entry_count,
+ decrement_cnt, "lookaside entry count");
+
+ ret = __las_insert_block_verbose(session, multi);
+ } else
+ __wt_cache_decr_check_uint64(session,
+ &conn->cache->las_entry_count,
+ insert_estimate, "lookaside entry count");
}
-err: /* Resolve the transaction. */
- if (ret == 0)
- ret = __wt_txn_commit(las_session, NULL);
- else
- WT_TRET(__wt_txn_rollback(las_session, NULL));
- __wt_free(session, multi->supd);
- multi->supd_entries = 0;
return (ret);
}
@@ -621,74 +730,49 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
/*
* __wt_las_remove_block --
- * Remove all records for a given page from the lookaside store.
+ * Remove all records for a given page from the lookaside table.
*/
int
-__wt_las_remove_block(WT_SESSION_IMPL *session,
- WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
+__wt_las_remove_block(
+ WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid)
{
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
WT_DECL_RET;
- WT_ITEM las_key;
WT_SESSION_IMPL *las_session;
- uint64_t las_counter, las_pageid, remove_cnt;
- uint32_t las_id, session_flags;
- bool local_cursor, local_txn;
+ WT_TXN_ISOLATION saved_isolation;
+ uint64_t decrement_cnt;
+ uint32_t session_flags;
- remove_cnt = 0;
+ conn = S2C(session);
session_flags = 0; /* [-Wconditional-uninitialized] */
- local_cursor = local_txn = false;
- if (cursor == NULL) {
- __wt_las_cursor(session, &cursor, &session_flags);
- local_cursor = true;
- }
- las_session = (WT_SESSION_IMPL *)cursor->session;
-
/*
- * Wrap all of the removes in a transaction, unless this remove is part
- * of a larger operation.
+ * This is an external API for removing records from the lookaside
+ * table, first acquiring a lookaside table cursor and enclosing
+ * transaction, then calling an underlying function to do the work.
*/
- if (local_cursor) {
- WT_ERR(__wt_txn_begin(las_session, NULL));
- las_session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
- local_txn = true;
- }
+ __wt_las_cursor(session, &cursor, &session_flags);
- /*
- * Search for the block's unique prefix and step through all matching
- * records, removing them.
- */
- ret = __wt_las_cursor_position(cursor, btree_id, pageid);
- for (; ret == 0; ret = cursor->next(cursor)) {
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
+ las_session = (WT_SESSION_IMPL *)cursor->session;
+ __las_set_isolation(las_session, &saved_isolation);
- /*
- * Confirm the search using the unique prefix; if not a match,
- * we're done searching for records for this page.
- */
- if (las_pageid != pageid || las_id != btree_id)
- break;
+ WT_ERR(__wt_txn_begin(las_session, NULL));
- WT_ERR(cursor->remove(cursor));
- ++remove_cnt;
- }
- WT_ERR_NOTFOUND_OK(ret);
+ ret = __las_remove_block(
+ las_session, cursor, btree_id, pageid, &decrement_cnt);
+ if (ret == 0)
+ ret = __wt_txn_commit(las_session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(las_session, NULL));
+ if (ret == 0)
+ __wt_cache_decr_check_uint64(session,
+ &conn->cache->las_entry_count,
+ decrement_cnt, "lookaside entry count");
+
+err: __las_restore_isolation(las_session, saved_isolation);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-err: if (local_txn) {
- if (ret == 0)
- ret = __wt_txn_commit(las_session, NULL);
- else
- WT_TRET(__wt_txn_rollback(las_session, NULL));
- }
- if (local_cursor)
- WT_TRET(__wt_las_cursor_close(
- session, &cursor, session_flags));
-
- WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt);
- __wt_cache_decr_check_uint64(session,
- &S2C(session)->cache->las_entry_count, remove_cnt,
- "lookaside entry count");
return (ret);
}
@@ -715,6 +799,34 @@ err: __wt_spin_unlock(session, &cache->las_sweep_lock);
}
/*
+ * __las_sweep_count --
+ * Calculate how many records to examine per sweep step.
+ */
+static inline uint64_t
+__las_sweep_count(WT_CACHE *cache)
+{
+ /*
+ * The sweep server wakes up every 10 seconds (by default), it's a slow
+ * moving thread. Try to review the entire lookaside table once every 5
+ * minutes, or every 30 calls.
+ *
+ * The reason is because the lookaside table exists because we're seeing
+ * cache/eviction pressure (it allows us to trade performance and disk
+ * space for cache space), and it's likely lookaside blocks are being
+ * evicted, and reading them back in doesn't help things. A trickier,
+ * but possibly better, alternative might be to review all lookaside
+ * blocks in the cache in order to get rid of them, and slowly review
+ * lookaside blocks that have already been evicted.
+ *
+ * Put upper and lower bounds on the calculation: since reads of pages
+ * with lookaside entries are blocked during sweep, make sure we do
+ * some work but don't block reads for too long.
+ */
+ return ((uint64_t)WT_MAX(100, WT_MIN(10 * WT_THOUSAND,
+ cache->las_entry_count / 30)));
+}
+
+/*
* __las_sweep_init --
* Prepare to start a lookaside sweep.
*/
@@ -726,11 +838,19 @@ __las_sweep_init(WT_SESSION_IMPL *session)
u_int i;
cache = S2C(session)->cache;
+ cache->las_sweep_cnt = __las_sweep_count(cache);
__wt_spin_lock(session, &cache->las_sweep_lock);
- /* If no files have been dropped, there's nothing to do. */
- if (cache->las_dropped_next == 0)
- WT_ERR(WT_NOTFOUND);
+
+ /*
+ * If no files have been dropped and the lookaside file is empty,
+ * there's nothing to do.
+ */
+ if (cache->las_dropped_next == 0) {
+ if (cache->las_entry_count == 0)
+ ret = WT_NOTFOUND;
+ goto err;
+ }
/* Scan the btree IDs to find min/max. */
cache->las_sweep_dropmin = UINT32_MAX;
@@ -767,22 +887,43 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_CURSOR *cursor;
+ WT_DECL_ITEM(saved_key);
WT_DECL_RET;
- WT_ITEM *key, las_key;
- uint64_t cnt, las_counter, las_pageid, remove_cnt;
+ WT_ITEM las_key, las_timestamp, las_value;
+ WT_ITEM *sweep_key;
+ WT_TXN_ISOLATION saved_isolation;
+#ifdef HAVE_TIMESTAMPS
+ wt_timestamp_t timestamp, *val_ts;
+#else
+ wt_timestamp_t *val_ts;
+#endif
+ uint64_t cnt, decrement_cnt, las_counter, las_pageid, txnid;
uint32_t las_id, session_flags;
+ uint8_t upd_type;
int notused;
+ bool local_txn, locked;
cache = S2C(session)->cache;
cursor = NULL;
- key = &cache->las_sweep_key;
- remove_cnt = 0;
+ sweep_key = &cache->las_sweep_key;
+ decrement_cnt = 0;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
+ local_txn = locked = false;
- __wt_las_cursor(session, &cursor, &session_flags);
+ WT_RET(__wt_scr_alloc(session, 0, &saved_key));
- /* We should have our own session. */
+ /*
+ * Allocate a cursor and wrap all the updates in a transaction.
+ * We should have our own lookaside cursor.
+ */
+ __wt_las_cursor(session, &cursor, &session_flags);
WT_ASSERT(session, cursor->session == &session->iface);
+ __las_set_isolation(session, &saved_isolation);
+ WT_ERR(__wt_txn_begin(session, NULL));
+ local_txn = true;
+
+ __wt_writelock(session, &cache->las_sweepwalk_lock);
+ locked = true;
/*
* When continuing a sweep, position the cursor using the key from the
@@ -792,8 +933,8 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* Otherwise, we're starting a new sweep, gather the list of trees to
* sweep.
*/
- if (key->size != 0) {
- __wt_cursor_set_raw_key(cursor, key);
+ if (sweep_key->size != 0) {
+ __wt_cursor_set_raw_key(cursor, sweep_key);
ret = cursor->search_near(cursor, &notused);
/*
@@ -803,51 +944,41 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* table. Searching for the same key could leave us stuck at
* the end of the table, repeatedly checking the same rows.
*/
- key->size = 0;
+ sweep_key->size = 0;
} else
ret = __las_sweep_init(session);
-
if (ret != 0)
goto srch_notfound;
/*
- * The sweep server wakes up every 10 seconds (by default), it's a slow
- * moving thread. Try to review the entire lookaside table once every 5
- * minutes, or every 30 calls.
- *
- * The reason is because the lookaside table exists because we're seeing
- * cache/eviction pressure (it allows us to trade performance and disk
- * space for cache space), and it's likely lookaside blocks are being
- * evicted, and reading them back in doesn't help things. A trickier,
- * but possibly better, alternative might be to review all lookaside
- * blocks in the cache in order to get rid of them, and slowly review
- * lookaside blocks that have already been evicted.
+ * Walk at least the number we calculated at the beginning of the
+ * sweep, or more if there have been additional records inserted in the
+ * meantime. Don't just repeat the calculation here since sweep
+ * removes entries and that would cause sweep to do less and less work
+ * rather than driving the lookaside table to empty.
*/
- cnt = (uint64_t)WT_MAX(100, cache->las_entry_count / 30);
+ cnt = __las_sweep_count(cache);
+ if (cnt < cache->las_sweep_cnt)
+ cnt = cache->las_sweep_cnt;
/* Walk the file. */
- for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) {
+ while ((ret = cursor->next(cursor)) == 0) {
/*
- * Give up if the cache is stuck: we are ignoring the cache
- * size while scanning the lookaside table, so we're making
- * things worse.
+ * Stop if the cache is stuck: we are ignoring the cache size
+ * while scanning the lookaside table, so we're making things
+ * worse.
*/
if (__wt_cache_stuck(session))
- cnt = 1;
+ cnt = 0;
/*
- * If the loop terminates after completing a work unit, we will
- * continue the table sweep next time. Get a local copy of the
- * sweep key, we're going to reset the cursor; do so before
- * calling cursor.remove, cursor.remove can discard our hazard
- * pointer and the page could be evicted from underneath us.
+ * If we have processed enough entries and we are between
+ * blocks, give up.
*/
- if (cnt == 1) {
- WT_ERR(__wt_cursor_get_raw_key(cursor, key));
- if (!WT_DATA_IN_ITEM(key))
- WT_ERR(__wt_buf_set(
- session, key, key->data, key->size));
- }
+ if (cnt > 0)
+ --cnt;
+ else if (saved_key->size == 0)
+ break;
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
@@ -859,34 +990,102 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* should another thread remove the record before we do (not
* expected for dropped trees), and the cursor remains
* positioned in that case.
- *
- * TODO it would also be good to remove entries in lookaside
- * from live files that have aged out. If we track for each
- * entry whether it was the on-page value chosen by
- * reconciliation, we can safely remove entries from that point
- * on (for the given key) that are visible to all readers.
*/
if (las_id >= cache->las_sweep_dropmin &&
las_id <= cache->las_sweep_dropmax &&
__bit_test(cache->las_sweep_dropmap,
las_id - cache->las_sweep_dropmin)) {
WT_ERR(cursor->remove(cursor));
- ++remove_cnt;
+ ++decrement_cnt;
+ saved_key->size = 0;
+ continue;
}
+
+ /*
+ * Remove entries from the lookaside that have aged out and are
+ * now no longer needed.
+ */
+ WT_ERR(cursor->get_value(cursor,
+ &txnid, &las_timestamp, &upd_type, &las_value));
+#ifdef HAVE_TIMESTAMPS
+ WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
+ memcpy(&timestamp, las_timestamp.data, las_timestamp.size);
+ val_ts = &timestamp;
+#else
+ val_ts = NULL;
+#endif
+
+ /*
+ * If this entry isn't globally visible we cannot remove it.
+ * If it is visible then perform additional checks to see
+ * whether it has aged out of a live file.
+ */
+ if (!__wt_txn_visible_all(session, txnid, val_ts)) {
+ saved_key->size = 0;
+ continue;
+ }
+
+ /*
+ * Save our key for comparing with older entries if we
+ * don't have one or it is different.
+ */
+ if (saved_key->size != las_key.size ||
+ memcmp(saved_key->data, las_key.data, las_key.size) != 0) {
+ /* If we have processed enough entries, give up. */
+ if (cnt == 0)
+ break;
+
+ WT_ERR(__wt_buf_set(session, saved_key,
+ las_key.data, las_key.size));
+
+ if (upd_type != WT_UPDATE_BIRTHMARK)
+ continue;
+ }
+
+ WT_ERR(cursor->remove(cursor));
+ ++decrement_cnt;
+ }
+
+ __wt_writeunlock(session, &cache->las_sweepwalk_lock);
+ locked = false;
+
+ /*
+ * If the loop terminates after completing a work unit, we will
+ * continue the table sweep next time. Get a local copy of the
+ * sweep key, we're going to reset the cursor; do so before
+ * calling cursor.remove, cursor.remove can discard our hazard
+ * pointer and the page could be evicted from underneath us.
+ */
+ if (ret == 0) {
+ WT_ERR(__wt_cursor_get_raw_key(cursor, sweep_key));
+ if (!WT_DATA_IN_ITEM(sweep_key))
+ WT_ERR(__wt_buf_set(session, sweep_key,
+ sweep_key->data, sweep_key->size));
}
srch_notfound:
WT_ERR_NOTFOUND_OK(ret);
if (0) {
-err: __wt_buf_free(session, key);
+err: __wt_buf_free(session, sweep_key);
+ }
+ if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ if (ret == 0)
+ __wt_cache_decr_check_uint64(session,
+ &S2C(session)->cache->las_entry_count,
+ decrement_cnt, "lookaside entry count");
}
+ if (locked)
+ __wt_writeunlock(session, &cache->las_sweepwalk_lock);
WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+ __las_restore_isolation(session, saved_isolation);
- __wt_cache_decr_check_uint64(session,
- &S2C(session)->cache->las_entry_count, remove_cnt,
- "lookaside entry count");
+ __wt_scr_free(session, &saved_key);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index cc3ea7c9d52..98f3ca6a633 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -247,6 +247,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = {
};
static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
+ { "ignore_prepare", "boolean", NULL, NULL, NULL, 0 },
{ "isolation", "string",
NULL, "choices=[\"read-uncommitted\",\"read-committed\","
"\"snapshot\"]",
@@ -443,6 +444,11 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
+static const WT_CONFIG_CHECK confchk_WT_SESSION_prepare_transaction[] = {
+ { "prepare_timestamp", "string", NULL, NULL, NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
static const WT_CONFIG_CHECK confchk_WT_SESSION_reconfigure[] = {
{ "ignore_cache_size", "boolean", NULL, NULL, NULL, 0 },
{ "isolation", "string",
@@ -1258,9 +1264,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_SESSION_alter, 4
},
{ "WT_SESSION.begin_transaction",
- "isolation=,name=,priority=0,read_timestamp=,"
- "round_to_oldest=false,snapshot=,sync=",
- confchk_WT_SESSION_begin_transaction, 7
+ "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
+ ",round_to_oldest=false,snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 8
},
{ "WT_SESSION.checkpoint",
"drop=,force=false,name=,target=,use_timestamp=true",
@@ -1325,6 +1331,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"target=",
confchk_WT_SESSION_open_cursor, 13
},
+ { "WT_SESSION.prepare_transaction",
+ "prepare_timestamp=",
+ confchk_WT_SESSION_prepare_transaction, 1
+ },
{ "WT_SESSION.rebalance",
"",
NULL, 0
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 871190380f7..00de16e6c21 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -252,6 +252,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET_MSG(NULL, ret,
"Failed to create session for eviction walks");
+ WT_RET(__wt_rwlock_init(session, &cache->las_sweepwalk_lock));
WT_RET(__wt_spin_init(session, &cache->las_lock, "lookaside table"));
WT_RET(__wt_spin_init(
session, &cache->las_sweep_lock, "lookaside sweep"));
@@ -400,6 +401,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
__wt_spin_destroy(session, &cache->evict_walk_lock);
__wt_spin_destroy(session, &cache->las_lock);
__wt_spin_destroy(session, &cache->las_sweep_lock);
+ __wt_rwlock_destroy(session, &cache->las_sweepwalk_lock);
wt_session = &cache->walk_session->iface;
if (wt_session != NULL)
WT_TRET(wt_session->close(wt_session, NULL));
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 39c84764070..71410a5a731 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -2274,7 +2274,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
WT_TRACK_OP_INIT(session);
- WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
+ WT_RET_TRACK(__evict_get_ref(session, is_server, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
app_timer = false;
@@ -2443,7 +2443,6 @@ err: if (timer) {
done: WT_TRACK_OP_END(session);
return (ret);
- /* NOTREACHED */
}
/*
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index 01a7617789a..1e8fc5f34c9 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -174,6 +174,7 @@ struct __wt_bm {
(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *);
int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
+ int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 3a6413162f3..9c29b72dc67 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -178,7 +178,7 @@ static inline void
__wt_cache_decr_check_size(
WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld)
{
- if (__wt_atomic_subsize(vp, v) < WT_EXABYTE)
+ if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE)
return;
/*
@@ -202,7 +202,7 @@ static inline void
__wt_cache_decr_check_uint64(
WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld)
{
- if (__wt_atomic_sub64(vp, v) < WT_EXABYTE)
+ if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE)
return;
/*
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 7a49f388826..9203e692291 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -200,7 +200,9 @@ struct __wt_cache {
uint64_t las_entry_count; /* Count of entries in lookaside */
uint64_t las_pageid; /* Lookaside table page ID counter */
- WT_SPINLOCK las_sweep_lock;
+ uint64_t las_sweep_cnt; /* Entries to walk per sweep. */
+ WT_RWLOCK las_sweepwalk_lock;
+ WT_SPINLOCK las_sweep_lock;
WT_ITEM las_sweep_key; /* Track sweep position. */
uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */
uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 4764ce0fd9f..541e811aa33 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -78,29 +78,30 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_WT_SESSION_log_flush 26
#define WT_CONFIG_ENTRY_WT_SESSION_log_printf 27
#define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 28
-#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 29
-#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 30
-#define WT_CONFIG_ENTRY_WT_SESSION_rename 31
-#define WT_CONFIG_ENTRY_WT_SESSION_reset 32
-#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 33
-#define WT_CONFIG_ENTRY_WT_SESSION_salvage 34
-#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 35
-#define WT_CONFIG_ENTRY_WT_SESSION_strerror 36
-#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 37
-#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 38
-#define WT_CONFIG_ENTRY_WT_SESSION_truncate 39
-#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 40
-#define WT_CONFIG_ENTRY_WT_SESSION_verify 41
-#define WT_CONFIG_ENTRY_colgroup_meta 42
-#define WT_CONFIG_ENTRY_file_config 43
-#define WT_CONFIG_ENTRY_file_meta 44
-#define WT_CONFIG_ENTRY_index_meta 45
-#define WT_CONFIG_ENTRY_lsm_meta 46
-#define WT_CONFIG_ENTRY_table_meta 47
-#define WT_CONFIG_ENTRY_wiredtiger_open 48
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 49
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 50
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 51
+#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 29
+#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 30
+#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 31
+#define WT_CONFIG_ENTRY_WT_SESSION_rename 32
+#define WT_CONFIG_ENTRY_WT_SESSION_reset 33
+#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 34
+#define WT_CONFIG_ENTRY_WT_SESSION_salvage 35
+#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 36
+#define WT_CONFIG_ENTRY_WT_SESSION_strerror 37
+#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 38
+#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 39
+#define WT_CONFIG_ENTRY_WT_SESSION_truncate 40
+#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 41
+#define WT_CONFIG_ENTRY_WT_SESSION_verify 42
+#define WT_CONFIG_ENTRY_colgroup_meta 43
+#define WT_CONFIG_ENTRY_file_config 44
+#define WT_CONFIG_ENTRY_file_meta 45
+#define WT_CONFIG_ENTRY_index_meta 46
+#define WT_CONFIG_ENTRY_lsm_meta 47
+#define WT_CONFIG_ENTRY_table_meta 48
+#define WT_CONFIG_ENTRY_wiredtiger_open 49
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 50
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 51
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 52
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index aef6b2d6777..be21fcb6456 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -193,7 +193,6 @@ struct __wt_connection_impl {
WT_FH *optrack_map_fh; /* Name to id translation file. */
WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */
uintmax_t optrack_pid; /* Cache the process ID. */
- uint16_t optrack_uid; /* Unique function ID */
void **foc; /* Free-on-close array */
size_t foc_cnt; /* Array entries */
diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h
index a4ef4757ec9..95edf7ed659 100644
--- a/src/third_party/wiredtiger/src/include/error.h
+++ b/src/third_party/wiredtiger/src/include/error.h
@@ -46,6 +46,13 @@
if ((__ret = (a)) != 0) \
return (__ret); \
} while (0)
+#define WT_RET_TRACK(a) do { \
+ int __ret; \
+ if ((__ret = (a)) != 0) { \
+ WT_TRACK_OP_END(session); \
+ return (__ret); \
+ } \
+} while (0)
#define WT_RET_MSG(session, v, ...) do { \
int __ret = (v); \
__wt_err(session, __ret, __VA_ARGS__); \
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 3674d9218da..a293b1ac516 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -45,7 +45,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
@@ -57,6 +57,7 @@ extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t
extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -208,7 +209,7 @@ extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_remove_block( WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
@@ -661,7 +662,6 @@ extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_
extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler);
-extern int __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -793,6 +793,7 @@ extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_F
extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 8b8c3a55a6c..e5cfb534db5 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -611,7 +611,7 @@ struct __wt_connection_stats {
int64_t child_modify_blocked_page;
int64_t tree_descend_blocked;
int64_t txn_commit_queue_empty;
- int64_t txn_commit_queue_head;
+ int64_t txn_commit_queue_tail;
int64_t txn_commit_queue_inserts;
int64_t txn_commit_queue_len;
int64_t txn_snapshots_created;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 3f48368b303..19eaf87cbd3 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -241,6 +241,7 @@ struct __wt_txn {
TAILQ_ENTRY(__wt_txn) commit_timestampq;
TAILQ_ENTRY(__wt_txn) read_timestampq;
+ bool clear_ts_queue; /* Set if we need to clear from the queue */
/* Array of modifications by this transaction. */
WT_TXN_OP *mod;
@@ -262,22 +263,23 @@ struct __wt_txn {
const char *rollback_reason; /* If rollback, the reason */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_AUTOCOMMIT 0x0001u
-#define WT_TXN_ERROR 0x0002u
-#define WT_TXN_HAS_ID 0x0004u
-#define WT_TXN_HAS_SNAPSHOT 0x0008u
-#define WT_TXN_HAS_TS_COMMIT 0x0010u
-#define WT_TXN_HAS_TS_READ 0x0020u
-#define WT_TXN_NAMED_SNAPSHOT 0x0040u
-#define WT_TXN_PUBLIC_TS_COMMIT 0x0080u
-#define WT_TXN_PUBLIC_TS_READ 0x0100u
-#define WT_TXN_READONLY 0x0200u
-#define WT_TXN_RUNNING 0x0400u
-#define WT_TXN_SYNC_SET 0x0800u
-#define WT_TXN_TS_COMMIT_ALWAYS 0x1000u
-#define WT_TXN_TS_COMMIT_KEYS 0x2000u
-#define WT_TXN_TS_COMMIT_NEVER 0x4000u
-#define WT_TXN_UPDATE 0x8000u
+#define WT_TXN_AUTOCOMMIT 0x00001u
+#define WT_TXN_ERROR 0x00002u
+#define WT_TXN_HAS_ID 0x00004u
+#define WT_TXN_HAS_SNAPSHOT 0x00008u
+#define WT_TXN_HAS_TS_COMMIT 0x00010u
+#define WT_TXN_HAS_TS_READ 0x00020u
+#define WT_TXN_IGNORE_PREPARE 0x00040u
+#define WT_TXN_NAMED_SNAPSHOT 0x00080u
+#define WT_TXN_PUBLIC_TS_COMMIT 0x00100u
+#define WT_TXN_PUBLIC_TS_READ 0x00200u
+#define WT_TXN_READONLY 0x00400u
+#define WT_TXN_RUNNING 0x00800u
+#define WT_TXN_SYNC_SET 0x01000u
+#define WT_TXN_TS_COMMIT_ALWAYS 0x02000u
+#define WT_TXN_TS_COMMIT_KEYS 0x04000u
+#define WT_TXN_TS_COMMIT_NEVER 0x08000u
+#define WT_TXN_UPDATE 0x10000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 53067bf44ab..fef1a935983 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1717,6 +1717,9 @@ struct __wt_session {
*
* @param session the session handle
* @configstart{WT_SESSION.begin_transaction, see dist/api_data.py}
+ * @config{ignore_prepare, whether to ignore the updates by other
+ * prepared transactions as part of read operations of this
+ * transaction., a boolean flag; default \c false.}
* @config{isolation, the isolation level for this transaction; defaults
* to the session's isolation level., a string\, chosen from the
* following options: \c "read-uncommitted"\, \c "read-committed"\, \c
@@ -1776,6 +1779,31 @@ struct __wt_session {
int __F(commit_transaction)(WT_SESSION *session, const char *config);
/*!
+ * Prepare the current transaction.
+ *
+ * A transaction must be in progress when this method is called.
+ * Preparing a transaction will guarantee subsequent commit will
+ * succeed. Only commit and rollback are allowed on a transaction after
+ * it has been prepared. At the moment, prepare transaction is designed
+ * to support MongoDB exclusively.
+ *
+ * @requires_transaction
+ *
+ * @snippet ex_all.c transaction prepare
+ *
+ * @param session the session handle
+ * @configstart{WT_SESSION.prepare_transaction, see dist/api_data.py}
+ * @config{prepare_timestamp, set the prepare timestamp for the updates
+ * of the current transaction. The supplied value should not be older
+ * than any active read timestamps. This configuration option is
+ * mandatory. See @ref transaction_timestamps., a string; default
+ * empty.}
+ * @configend
+ * @errors
+ */
+ int __F(prepare_transaction)(WT_SESSION *session, const char *config);
+
+ /*!
* Roll back the current transaction.
*
* A transaction must be in progress when this method is called.
@@ -5455,8 +5483,8 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1296
/*! transaction: commit timestamp queue insert to empty */
#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1297
-/*! transaction: commit timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1298
+/*! transaction: commit timestamp queue inserts to tail */
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1298
/*! transaction: commit timestamp queue inserts total */
#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1299
/*! transaction: commit timestamp queue length */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index c5a3935813a..d737c09c391 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -865,6 +865,45 @@ err: API_END_RET(session, ret);
}
/*
+ * __clsm_position_chunk --
+ * Position a chunk cursor.
+ */
+static int
+__clsm_position_chunk(
+ WT_CURSOR_LSM *clsm, WT_CURSOR *c, bool forward, int *cmpp)
+{
+ WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
+
+ cursor = &clsm->iface;
+ session = (WT_SESSION_IMPL *)cursor->session;
+
+ c->set_key(c, &cursor->key);
+ WT_RET(c->search_near(c, cmpp));
+
+ while (forward ? *cmpp < 0 : *cmpp > 0) {
+ WT_RET(forward ? c->next(c) : c->prev(c));
+
+ /*
+ * With higher isolation levels, where we have stable reads,
+ * we're done: the cursor is now positioned as expected.
+ *
+ * With read-uncommitted isolation, a new record could have
+ * appeared in between the search and stepping forward / back.
+ * In that case, keep going until we see a key in the expected
+ * range.
+ */
+ if (session->txn.isolation != WT_ISO_READ_UNCOMMITTED)
+ return (0);
+
+ WT_RET(WT_LSM_CURCMP(session,
+ clsm->lsm_tree, c, cursor, *cmpp));
+ }
+
+ return (0);
+}
+
+/*
* __clsm_next --
* WT_CURSOR->next method for the LSM cursor type.
*/
@@ -877,7 +916,7 @@ __clsm_next(WT_CURSOR *cursor)
WT_SESSION_IMPL *session;
u_int i;
int cmp;
- bool check, deleted;
+ bool deleted;
clsm = (WT_CURSOR_LSM *)cursor;
@@ -887,29 +926,17 @@ __clsm_next(WT_CURSOR *cursor)
/* If we aren't positioned for a forward scan, get started. */
if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) {
- F_CLR(clsm, WT_CLSM_MULTIPLE);
WT_FORALL_CURSORS(clsm, c, i) {
if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
WT_ERR(c->reset(c));
ret = c->next(c);
- } else if (c != clsm->current) {
- c->set_key(c, &cursor->key);
- if ((ret = c->search_near(c, &cmp)) == 0) {
- if (cmp < 0)
- ret = c->next(c);
- else if (cmp == 0) {
- if (clsm->current == NULL)
- clsm->current = c;
- else
- F_SET(clsm,
- WT_CLSM_MULTIPLE);
- }
- } else
- F_CLR(c, WT_CURSTD_KEY_SET);
- }
+ } else if (c != clsm->current && (ret =
+ __clsm_position_chunk(clsm, c, true, &cmp)) == 0 &&
+ cmp == 0 && clsm->current == NULL)
+ clsm->current = c;
WT_ERR_NOTFOUND_OK(ret);
}
- F_SET(clsm, WT_CLSM_ITERATE_NEXT);
+ F_SET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_MULTIPLE);
F_CLR(clsm, WT_CLSM_ITERATE_PREV);
/* We just positioned *at* the key, now move. */
@@ -921,19 +948,16 @@ retry: /*
* forward.
*/
if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
- check = false;
WT_FORALL_CURSORS(clsm, c, i) {
if (!F_ISSET(c, WT_CURSTD_KEY_INT))
continue;
- if (check) {
+ if (c != clsm->current) {
WT_ERR(WT_LSM_CURCMP(session,
clsm->lsm_tree, c, clsm->current,
cmp));
if (cmp == 0)
WT_ERR_NOTFOUND_OK(c->next(c));
}
- if (c == clsm->current)
- check = true;
}
}
@@ -1055,7 +1079,7 @@ __clsm_prev(WT_CURSOR *cursor)
WT_SESSION_IMPL *session;
u_int i;
int cmp;
- bool check, deleted;
+ bool deleted;
clsm = (WT_CURSOR_LSM *)cursor;
@@ -1065,28 +1089,17 @@ __clsm_prev(WT_CURSOR *cursor)
/* If we aren't positioned for a reverse scan, get started. */
if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) {
- F_CLR(clsm, WT_CLSM_MULTIPLE);
WT_FORALL_CURSORS(clsm, c, i) {
if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) {
WT_ERR(c->reset(c));
ret = c->prev(c);
- } else if (c != clsm->current) {
- c->set_key(c, &cursor->key);
- if ((ret = c->search_near(c, &cmp)) == 0) {
- if (cmp > 0)
- ret = c->prev(c);
- else if (cmp == 0) {
- if (clsm->current == NULL)
- clsm->current = c;
- else
- F_SET(clsm,
- WT_CLSM_MULTIPLE);
- }
- }
- }
+ } else if (c != clsm->current && (ret =
+ __clsm_position_chunk(clsm, c, false, &cmp)) == 0 &&
+ cmp == 0 && clsm->current == NULL)
+ clsm->current = c;
WT_ERR_NOTFOUND_OK(ret);
}
- F_SET(clsm, WT_CLSM_ITERATE_PREV);
+ F_SET(clsm, WT_CLSM_ITERATE_PREV | WT_CLSM_MULTIPLE);
F_CLR(clsm, WT_CLSM_ITERATE_NEXT);
/* We just positioned *at* the key, now move. */
@@ -1098,23 +1111,20 @@ retry: /*
* backwards.
*/
if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) {
- check = false;
WT_FORALL_CURSORS(clsm, c, i) {
if (!F_ISSET(c, WT_CURSTD_KEY_INT))
continue;
- if (check) {
+ if (c != clsm->current) {
WT_ERR(WT_LSM_CURCMP(session,
clsm->lsm_tree, c, clsm->current,
cmp));
if (cmp == 0)
WT_ERR_NOTFOUND_OK(c->prev(c));
}
- if (c == clsm->current)
- check = true;
}
}
- /* Move the smallest cursor backwards. */
+ /* Move the largest cursor backwards. */
c = clsm->current;
WT_ERR_NOTFOUND_OK(c->prev(c));
}
@@ -1279,6 +1289,7 @@ __clsm_search(WT_CURSOR *cursor)
WT_ERR(__cursor_needkey(cursor));
__cursor_novalue(cursor);
WT_ERR(__clsm_enter(clsm, true, false));
+ F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
ret = __clsm_lookup(clsm, &cursor->value);
diff --git a/src/third_party/wiredtiger/src/optrack/optrack.c b/src/third_party/wiredtiger/src/optrack/optrack.c
index 8258a715927..dd630785cd5 100644
--- a/src/third_party/wiredtiger/src/optrack/optrack.c
+++ b/src/third_party/wiredtiger/src/optrack/optrack.c
@@ -10,12 +10,13 @@
/*
* __wt_optrack_record_funcid --
- * Record optrack function id
+ * Allocate and record optrack function ID.
*/
void
__wt_optrack_record_funcid(
WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp)
{
+ static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */
WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
@@ -27,7 +28,7 @@ __wt_optrack_record_funcid(
__wt_spin_lock(session, &conn->optrack_map_spinlock);
if (*func_idp == 0) {
- *func_idp = ++conn->optrack_uid;
+ *func_idp = ++optrack_uid;
WT_ERR(__wt_buf_fmt(
session, tmp, "%" PRIu16 " %s\n", *func_idp, func));
diff --git a/src/third_party/wiredtiger/src/os_common/os_abort.c b/src/third_party/wiredtiger/src/os_common/os_abort.c
index a725ad9151d..85dcc741855 100644
--- a/src/third_party/wiredtiger/src/os_common/os_abort.c
+++ b/src/third_party/wiredtiger/src/os_common/os_abort.c
@@ -24,7 +24,7 @@ __wt_abort(WT_SESSION_IMPL *session)
/* Sleep forever, the debugger will interrupt us when it attaches. */
for (i = 0; i < WT_MILLION; ++i)
- __wt_sleep(10, 0);
+ __wt_sleep(100, 0);
#else
__wt_errx(session, "aborting WiredTiger library");
#endif
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index daaf55d65d2..b944bbda520 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -66,8 +66,10 @@ __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
/* Fast path if already signalled. */
*signalled = true;
- if (__wt_atomic_addi32(&cond->waiters, 1) == 0)
+ if (__wt_atomic_addi32(&cond->waiters, 1) == 0) {
+ WT_TRACK_OP_END(session);
return;
+ }
__wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name);
WT_STAT_CONN_INCR(session, cond_wait);
@@ -138,10 +140,10 @@ err: (void)__wt_atomic_subi32(&cond->waiters, 1);
if (locked)
WT_TRET(pthread_mutex_unlock(&cond->mtx));
- if (ret == 0) {
- WT_TRACK_OP_END(session);
+
+ WT_TRACK_OP_END(session);
+ if (ret == 0)
return;
- }
WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index d9b415a76cd..044d31b8fbd 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -6155,10 +6155,14 @@ __rec_las_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
__wt_las_cursor(session, &cursor, &session_flags);
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL)
+ if (multi->supd != NULL) {
WT_ERR(__wt_las_insert_block(
session, cursor, r->page, multi, key));
+ __wt_free(session, multi->supd);
+ multi->supd_entries = 0;
+ }
+
err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
__wt_scr_free(session, &key);
@@ -6184,7 +6188,7 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
*/
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
if (multi->supd != NULL && multi->page_las.las_pageid != 0)
- WT_TRET(__wt_las_remove_block(session, NULL,
+ WT_TRET(__wt_las_remove_block(session,
btree_id, multi->page_las.las_pageid));
return (ret);
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index fd091cb5b13..be3a5d93473 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -109,6 +109,37 @@ __wt_session_release_resources(WT_SESSION_IMPL *session)
}
/*
+ * __session_clear_commit_queue --
+ * We're about to clear the session and overwrite the txn structure.
+ * Remove ourselves from the commit timestamp queue if we're on it.
+ */
+static void
+__session_clear_commit_queue(WT_SESSION_IMPL *session)
+{
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+
+ if (!txn->clear_ts_queue)
+ return;
+
+ __wt_writelock(session, &txn_global->commit_timestamp_rwlock);
+ /*
+ * Recheck after acquiring the lock.
+ */
+ if (txn->clear_ts_queue) {
+ TAILQ_REMOVE(
+ &txn_global->commit_timestamph, txn, commit_timestampq);
+ --txn_global->commit_timestampq_len;
+ txn->clear_ts_queue = false;
+ }
+ __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
+
+}
+
+/*
* __session_clear --
* Clear a session structure.
*/
@@ -127,6 +158,7 @@ __session_clear(WT_SESSION_IMPL *session)
*
* For these reasons, be careful when clearing the session structure.
*/
+ __session_clear_commit_queue(session);
memset(session, 0, WT_SESSION_CLEAR_SIZE);
WT_INIT_LSN(&session->bg_sync_lsn);
@@ -1482,6 +1514,31 @@ err: API_END_RET(session, ret);
}
/*
+ * __session_prepare_transaction --
+ * WT_SESSION->prepare_transaction method.
+ */
+static int
+__session_prepare_transaction(WT_SESSION *wt_session, const char *config)
+{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL(session, prepare_transaction, config, cfg);
+
+ WT_ERR(__wt_txn_context_check(session, true));
+
+ WT_TRET(__wt_txn_prepare(session, cfg));
+
+ /*
+ * Below code to be corrected as part of prepare functionality
+ * implementation, coded as below to avoid setting error to transaction.
+ */
+
+err: API_END_RET_NO_TXN_ERROR(session, ret);
+}
+
+/*
* __session_rollback_transaction --
* WT_SESSION->rollback_transaction method.
*/
@@ -1825,6 +1882,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
__session_verify,
__session_begin_transaction,
__session_commit_transaction,
+ __session_prepare_transaction,
__session_rollback_transaction,
__session_timestamp_transaction,
__session_checkpoint,
@@ -1855,6 +1913,7 @@ __open_session(WT_CONNECTION_IMPL *conn,
__session_verify,
__session_begin_transaction,
__session_commit_transaction,
+ __session_prepare_transaction,
__session_rollback_transaction,
__session_timestamp_transaction,
__session_checkpoint_readonly,
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index c7cfa0c3932..0569d0545e6 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -169,11 +169,11 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler)
} while (0)
/*
- * __wt_eventv --
+ * __eventv --
* Report a message to an event handler.
*/
-int
-__wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
+static int
+__eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
const char *file_name, int line_number, const char *fmt, va_list ap)
WT_GCC_FUNC_ATTRIBUTE((cold))
{
@@ -193,7 +193,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
* SECURITY:
* Buffer placed at the end of the stack in case snprintf overflows.
*/
- char s[2048];
+ char s[4 * 1024];
p = s;
remain = sizeof(s);
@@ -279,6 +279,17 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
__handler_failure(session, ret, "error", true);
}
+ /*
+ * The buffer is fixed sized, complain if we overflow. (The test is for
+ * no more bytes remaining in the buffer, so technically we might have
+ * filled it exactly.) Be cautious changing this code, it's a recursive
+ * call.
+ */
+ if (ret == 0 && remain == 0)
+ __wt_err(session, ENOMEM,
+ "error or message truncated: internal WiredTiger buffer "
+ "too small");
+
if (ret != 0) {
err: if (fprintf(stderr,
"WiredTiger Error%s%s: ",
@@ -314,7 +325,7 @@ __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...)
* an error value to return.
*/
va_start(ap, fmt);
- WT_IGNORE_RET(__wt_eventv(session, false, error, NULL, 0, fmt, ap));
+ WT_IGNORE_RET(__eventv(session, false, error, NULL, 0, fmt, ap));
va_end(ap);
}
@@ -334,7 +345,7 @@ __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...)
* an error value to return.
*/
va_start(ap, fmt);
- WT_IGNORE_RET(__wt_eventv(session, false, 0, NULL, 0, fmt, ap));
+ WT_IGNORE_RET(__eventv(session, false, 0, NULL, 0, fmt, ap));
va_end(ap);
}
@@ -355,7 +366,7 @@ __wt_ext_err_printf(
session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session;
va_start(ap, fmt);
- ret = __wt_eventv(session, false, 0, NULL, 0, fmt, ap);
+ ret = __eventv(session, false, 0, NULL, 0, fmt, ap);
va_end(ap);
return (ret);
}
@@ -372,7 +383,7 @@ __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
va_list ap;
va_start(ap, fmt);
- WT_IGNORE_RET(__wt_eventv(session, true, 0, NULL, 0, fmt, ap));
+ WT_IGNORE_RET(__eventv(session, true, 0, NULL, 0, fmt, ap));
va_end(ap);
}
@@ -493,7 +504,7 @@ __wt_assert(WT_SESSION_IMPL *session,
va_list ap;
va_start(ap, fmt);
- WT_IGNORE_RET(__wt_eventv(
+ WT_IGNORE_RET(__eventv(
session, false, error, file_name, line_number, fmt, ap));
va_end(ap);
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 926176d6024..97c22cd5031 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1023,7 +1023,7 @@ static const char * const __stats_connection_desc[] = {
"thread-yield: page reconciliation yielded due to child modification",
"thread-yield: tree descend one level yielded for split page index update",
"transaction: commit timestamp queue insert to empty",
- "transaction: commit timestamp queue inserts to head",
+ "transaction: commit timestamp queue inserts to tail",
"transaction: commit timestamp queue inserts total",
"transaction: commit timestamp queue length",
"transaction: number of named snapshots created",
@@ -1403,7 +1403,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->child_modify_blocked_page = 0;
stats->tree_descend_blocked = 0;
stats->txn_commit_queue_empty = 0;
- stats->txn_commit_queue_head = 0;
+ stats->txn_commit_queue_tail = 0;
stats->txn_commit_queue_inserts = 0;
stats->txn_commit_queue_len = 0;
stats->txn_snapshots_created = 0;
@@ -1909,8 +1909,8 @@ __wt_stat_connection_aggregate(
to->tree_descend_blocked += WT_STAT_READ(from, tree_descend_blocked);
to->txn_commit_queue_empty +=
WT_STAT_READ(from, txn_commit_queue_empty);
- to->txn_commit_queue_head +=
- WT_STAT_READ(from, txn_commit_queue_head);
+ to->txn_commit_queue_tail +=
+ WT_STAT_READ(from, txn_commit_queue_tail);
to->txn_commit_queue_inserts +=
WT_STAT_READ(from, txn_commit_queue_inserts);
to->txn_commit_queue_len += WT_STAT_READ(from, txn_commit_queue_len);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 5d70acb90f0..627bafa7483 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -449,6 +449,14 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
/*
+ * Prepare transactions are supported only in timestamp build.
+ */
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "ignore_prepare", 0, &cval));
+ if (cval.val)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+
+ /*
* Read the configuration here to reduce the span of the
* critical section.
*/
@@ -608,11 +616,12 @@ __wt_txn_release(WT_SESSION_IMPL *session)
static inline int
__txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
{
+ WT_DECL_TIMESTAMP(op_timestamp)
WT_TXN *txn;
WT_TXN_OP *op;
WT_UPDATE *upd;
u_int i;
- bool op_used_ts, upd_used_ts;
+ bool op_zero_ts, upd_zero_ts;
txn = &session->txn;
@@ -644,10 +653,12 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
if (op->type == WT_TXN_OP_BASIC_TS ||
op->type == WT_TXN_OP_BASIC) {
/*
- * Skip over any aborted update structures.
+ * Skip over any aborted update structures or ones
+ * from our own transaction.
*/
upd = op->u.upd->next;
- while (upd != NULL && upd->txnid == WT_TXN_ABORTED)
+ while (upd != NULL && (upd->txnid == WT_TXN_ABORTED ||
+ upd->txnid == txn->id))
upd = upd->next;
/*
@@ -660,16 +671,31 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
/*
* Check for consistent per-key timestamp usage.
* If timestamps are or are not used originally then
- * they should be used the same way always. Check
- * timestamps are used in order.
+ * they should be used the same way always. For this
+ * transaction, timestamps are in use anytime the
+ * commit timestamp is set.
+ * Check timestamps are used in order.
*/
- op_used_ts =
- __wt_timestamp_iszero(&op->u.upd->timestamp);
- upd_used_ts = __wt_timestamp_iszero(&upd->timestamp);
- if (op_used_ts != upd_used_ts)
+ op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
+ upd_zero_ts = __wt_timestamp_iszero(&upd->timestamp);
+ if (op_zero_ts != upd_zero_ts)
WT_RET_MSG(session, EINVAL,
"per-key timestamps used inconsistently");
- if (__wt_timestamp_cmp(&op->u.upd->timestamp,
+ /*
+ * If we aren't using timestamps for this transaction
+ * then we are done checking. Don't check the timestamp
+ * because the one in the transaction is not cleared.
+ */
+ if (op_zero_ts)
+ continue;
+ op_timestamp = op->u.upd->timestamp;
+ /*
+ * Only if the update structure doesn't have a timestamp
+ * then use the one in the transaction structure.
+ */
+ if (__wt_timestamp_iszero(&op->u.upd->timestamp))
+ op_timestamp = txn->commit_timestamp;
+ if (__wt_timestamp_cmp(&op_timestamp,
&upd->timestamp) < 0)
WT_RET_MSG(session, EINVAL,
"out of order timestamps");
@@ -941,6 +967,22 @@ err: /*
}
/*
+ * __wt_txn_prepare --
+ * Prepare the current transaction.
+ */
+int
+__wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_UNUSED(cfg);
+
+#ifdef HAVE_TIMESTAMPS
+ WT_RET_MSG(session, ENOTSUP, "prepare_transaction is not supported");
+#else
+ WT_RET_MSG(session, ENOTSUP, "prepare_transaction requires a version "
+ "of WiredTiger built with timestamp support");
+#endif
+}
+/*
* __wt_txn_rollback --
* Roll back the current transaction.
*/
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 0af70c4090d..d2d07b9e6d7 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -22,13 +22,13 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
WT_DECL_TIMESTAMP(rollback_timestamp)
WT_ITEM las_key, las_timestamp, las_value;
WT_TXN_GLOBAL *txn_global;
- uint64_t las_counter, las_pageid, las_total, las_txnid;
+ uint64_t las_counter, las_pageid, las_total, las_txnid, remove_cnt;
uint32_t las_id, session_flags;
uint8_t upd_type;
conn = S2C(session);
cursor = NULL;
- las_total = 0;
+ las_total = remove_cnt = 0;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_timestamp);
@@ -49,6 +49,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
F_SET(session, WT_SESSION_READ_WONT_NEED);
/* Walk the file. */
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
@@ -70,13 +71,17 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
* be removed.
*/
if (__wt_timestamp_cmp(
- &rollback_timestamp, las_timestamp.data) < 0)
+ &rollback_timestamp, las_timestamp.data) < 0) {
WT_ERR(cursor->remove(cursor));
- else
+ ++remove_cnt;
+ } else
++las_total;
}
WT_ERR_NOTFOUND_OK(ret);
-err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+ __wt_cache_decr_check_uint64(session,
+ &conn->cache->las_entry_count, remove_cnt, "lookaside entry count");
WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total);
F_CLR(session, WT_SESSION_READ_WONT_NEED);
@@ -424,8 +429,14 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
* Mark that a rollback operation is in progress and wait for eviction
* to drain. This is necessary because lookaside eviction uses
* transactions and causes the check for a quiescent system to fail.
+ *
+ * Configuring lookaside eviction off isn't atomic, safe because the
+ * flag is only otherwise set when closing down the database. Assert
+ * to avoid confusion in the future.
*/
+ WT_ASSERT(session, !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE));
F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+
WT_ERR(__wt_conn_btree_apply(session,
NULL, __txn_rollback_eviction_drain, NULL, cfg));
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 41ac970f14e..d07bfecd47c 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -212,11 +212,20 @@ __txn_global_query_timestamp(
/* Compare with the oldest running transaction. */
__wt_readlock(session, &txn_global->commit_timestamp_rwlock);
- txn = TAILQ_FIRST(&txn_global->commit_timestamph);
- if (txn != NULL &&
- __wt_timestamp_cmp(&txn->first_commit_timestamp, &ts) < 0) {
- __wt_timestamp_set(&ts, &txn->first_commit_timestamp);
- WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
+ TAILQ_FOREACH(txn, &txn_global->commit_timestamph,
+ commit_timestampq) {
+ if (txn->clear_ts_queue)
+ continue;
+ /*
+ * Compare on the first real running transaction.
+ */
+ if (__wt_timestamp_cmp(
+ &txn->first_commit_timestamp, &ts) < 0) {
+ __wt_timestamp_set(
+ &ts, &txn->first_commit_timestamp);
+ WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
+ }
+ break;
}
__wt_readunlock(session, &txn_global->commit_timestamp_rwlock);
} else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
@@ -652,7 +661,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
void
__wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session)
{
- WT_TXN *prev, *txn;
+ WT_TXN *qtxn, *txn, *txn_tmp;
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t ts;
@@ -668,27 +677,65 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session)
* fixed.
*/
__wt_timestamp_set(&ts, &txn->commit_timestamp);
- __wt_timestamp_set(&txn->first_commit_timestamp, &ts);
__wt_writelock(session, &txn_global->commit_timestamp_rwlock);
- prev = TAILQ_LAST(&txn_global->commit_timestamph, __wt_txn_cts_qh);
- if (prev == NULL)
- WT_STAT_CONN_INCR(session, txn_commit_queue_empty);
- for (; prev != NULL &&
- __wt_timestamp_cmp(&prev->first_commit_timestamp, &ts) > 0;
- prev = TAILQ_PREV(prev, __wt_txn_cts_qh, commit_timestampq))
- ;
- if (prev == NULL) {
+ /*
+ * If our transaction is on the queue remove it first. The timestamp
+ * may move earlier so we otherwise might not remove ourselves before
+ * finding where to insert ourselves (which would result in a list
+ * loop) and we don't want to walk more of the list than needed.
+ */
+ if (txn->clear_ts_queue) {
+ TAILQ_REMOVE(&txn_global->commit_timestamph,
+ txn, commit_timestampq);
+ WT_PUBLISH(txn->clear_ts_queue, false);
+ --txn_global->commit_timestampq_len;
+ }
+ /*
+ * Walk the list to look for where to insert our own transaction
+ * and remove any transactions that are not active. We stop when
+ * we get to the location where we want to insert.
+ */
+ if (TAILQ_EMPTY(&txn_global->commit_timestamph)) {
TAILQ_INSERT_HEAD(
&txn_global->commit_timestamph, txn, commit_timestampq);
- WT_STAT_CONN_INCR(session, txn_commit_queue_head);
- } else
- TAILQ_INSERT_AFTER(&txn_global->commit_timestamph,
- prev, txn, commit_timestampq);
+ WT_STAT_CONN_INCR(session, txn_commit_queue_empty);
+ } else {
+ TAILQ_FOREACH_SAFE(qtxn, &txn_global->commit_timestamph,
+ commit_timestampq, txn_tmp) {
+ if (qtxn->clear_ts_queue) {
+ TAILQ_REMOVE(&txn_global->commit_timestamph,
+ qtxn, commit_timestampq);
+ WT_PUBLISH(qtxn->clear_ts_queue, false);
+ --txn_global->commit_timestampq_len;
+ continue;
+ }
+ /*
+ * Only walk the list up until we get to the place where
+ * we want to insert our timestamp. Some other thread
+ * will remove any later transactions.
+ */
+ if (__wt_timestamp_cmp(
+ &qtxn->first_commit_timestamp, &ts) > 0)
+ break;
+ }
+ /*
+ * If we got to the end, then our timestamp is larger than
+ * the last element's timestamp. Insert at the end.
+ */
+ if (qtxn == NULL) {
+ TAILQ_INSERT_TAIL(&txn_global->commit_timestamph,
+ txn, commit_timestampq);
+ WT_STAT_CONN_INCR(session, txn_commit_queue_tail);
+ } else
+ TAILQ_INSERT_BEFORE(qtxn, txn, commit_timestampq);
+ }
+ __wt_timestamp_set(&txn->first_commit_timestamp, &ts);
++txn_global->commit_timestampq_len;
WT_STAT_CONN_INCR(session, txn_commit_queue_inserts);
- __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
+ txn->clear_ts_queue = false;
F_SET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_PUBLIC_TS_COMMIT);
+ __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
}
/*
@@ -699,19 +746,22 @@ void
__wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
+ uint32_t flags;
txn = &session->txn;
- txn_global = &S2C(session)->txn_global;
if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT))
return;
+ flags = txn->flags;
+ LF_CLR(WT_TXN_PUBLIC_TS_COMMIT);
- __wt_writelock(session, &txn_global->commit_timestamp_rwlock);
- TAILQ_REMOVE(&txn_global->commit_timestamph, txn, commit_timestampq);
- --txn_global->commit_timestampq_len;
- __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock);
- F_CLR(txn, WT_TXN_PUBLIC_TS_COMMIT);
+ /*
+ * Notify other threads that our transaction is inactive and can be
+ * cleaned up safely from the commit timestamp queue whenever the next
+ * thread walks the queue. We do not need to remove it now.
+ */
+ WT_PUBLISH(txn->clear_ts_queue, true);
+ WT_PUBLISH(txn->flags, flags);
}
/*
diff --git a/src/third_party/wiredtiger/test/csuite/rwlock/main.c b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
index 2b4e9144fe4..e1d00344ee2 100644
--- a/src/third_party/wiredtiger/test/csuite/rwlock/main.c
+++ b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
@@ -50,8 +50,8 @@ void *thread_dump(void *);
int
main(int argc, char *argv[])
{
- TEST_OPTS *opts, _opts;
struct timespec te, ts;
+ TEST_OPTS *opts, _opts;
pthread_t dump_id, id[MAX_THREADS];
int i;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
index 85a581d73c9..3e64b86599d 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c
@@ -58,8 +58,8 @@ get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch,
WT_CURSOR *statcursor;
uint64_t val;
int ret;
- bool match;
char *desc, *valstr;
+ bool match;
match = false;
*pval = 0;
@@ -91,8 +91,8 @@ main(int argc, char *argv[])
WT_SESSION *session;
uint64_t maincount;
int half, i, j;
- const char *tablename;
char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256];
+ const char *tablename;
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
index 8a130f8d958..39c0cbdbce8 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c
@@ -48,8 +48,8 @@ main(int argc, char *argv[])
WT_RAND_STATE rnd;
size_t len;
uint32_t hw, sw;
- u_int i, j;
uint8_t *data;
+ u_int i, j;
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
index de05c459e10..58df56b50b1 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
@@ -48,16 +48,16 @@ int
main(int argc, char *argv[])
{
TEST_OPTS *opts, _opts;
- WT_CURSOR *maincur;
WT_CURSOR *balancecur, *flagcur, *joincur, *postcur;
+ WT_CURSOR *maincur;
WT_SESSION *session;
int balance, count, flag, key, key2, post, ret;
- char cfg[128];
- const char *tablename;
- char posturi[256];
char balanceuri[256];
+ char cfg[128];
char flaguri[256];
char joinuri[256];
+ char posturi[256];
+ const char *tablename;
/* Ignore unless requested */
if (!testutil_is_flag_set("TESTUTIL_ENABLE_LONG_TESTS"))
@@ -177,10 +177,10 @@ void
populate(TEST_OPTS *opts)
{
WT_CURSOR *maincur;
+ WT_RAND_STATE rnd;
WT_SESSION *session;
uint32_t key;
int balance, i, flag, post;
- WT_RAND_STATE rnd;
__wt_random_init_seed(NULL, &rnd);
diff --git a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
index fd713e50ba0..6c463297e93 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c
@@ -477,10 +477,10 @@ static WT_EVENT_HANDLER event_handler = {
static void
subtest_main(int argc, char *argv[], bool close_test)
{
+ struct rlimit rlim;
TEST_OPTS *opts, _opts;
WT_SESSION *session;
char config[1024], filename[1024];
- struct rlimit rlim;
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
index 502c0d05a31..cf6b931c027 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c
@@ -90,8 +90,8 @@ main(int argc, char *argv[])
WT_CURSOR *cursor1, *cursor2, *jcursor;
WT_ITEM k, v;
WT_SESSION *session;
- int i, ret;
int32_t key, val[2];
+ int i, ret;
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
index 82d8cae5d04..4a541c31e3a 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c
@@ -44,8 +44,8 @@ main(int argc, char *argv[])
TEST_OPTS *opts, _opts;
WT_CURSOR *cursor;
WT_SESSION *session;
- char *kstr, *vstr;
char buf[1024];
+ char *kstr, *vstr;
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
index c985b1f8f32..cb8e1de5d6e 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c
@@ -38,18 +38,11 @@
* account the collator.
*/
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
-
#define KEY_SIZE 20
static int
my_compare(WT_COLLATOR *collator, WT_SESSION *session,
- const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
+ const WT_ITEM *v1, const WT_ITEM *v2, int *cmp)
{
(void)collator;
(void)session;
@@ -67,10 +60,10 @@ main(int argc, char *argv[])
{
TEST_OPTS *opts, _opts;
WT_CONNECTION *conn;
- WT_SESSION *session;
WT_CURSOR *cursor;
- char buf[KEY_SIZE];
WT_ITEM key;
+ WT_SESSION *session;
+ char buf[KEY_SIZE];
opts = &_opts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c
index a3a9ad3a62f..adf32713cd2 100644
--- a/src/third_party/wiredtiger/test/format/bdb.c
+++ b/src/third_party/wiredtiger/test/format/bdb.c
@@ -116,7 +116,7 @@ bdb_insert(
}
void
-bdb_np(int next,
+bdb_np(bool next,
void *keyp, size_t *keysizep,
void *valuep, size_t *valuesizep, int *notfoundp)
{
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 9191a73a134..a80c7de5c92 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -278,6 +278,10 @@ typedef struct {
uint64_t remove;
uint64_t ops;
+ uint64_t keyno; /* current key, value */
+ WT_ITEM *key, _key;
+ WT_ITEM *value, _value;
+
#define TINFO_RUNNING 1 /* Running */
#define TINFO_COMPLETE 2 /* Finished */
#define TINFO_JOINED 3 /* Resolved */
@@ -287,7 +291,7 @@ typedef struct {
#ifdef HAVE_BERKELEY_DB
void bdb_close(void);
void bdb_insert(const void *, size_t, const void *, size_t);
-void bdb_np(int, void *, size_t *, void *, size_t *, int *);
+void bdb_np(bool, void *, size_t *, void *, size_t *, int *);
void bdb_open(void);
void bdb_read(uint64_t, void *, size_t *, int *);
void bdb_remove(uint64_t, int *);
@@ -314,7 +318,7 @@ WT_THREAD_RET lrt(void *);
void path_setup(const char *);
void print_item(const char *, WT_ITEM *);
void print_item_data(const char *, const uint8_t *, size_t);
-int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
uint32_t rng(WT_RAND_STATE *);
WT_THREAD_RET timestamp(void *);
void track(const char *, uint64_t, TINFO *);
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
index fdf91508dd6..9d99933ef64 100644
--- a/src/third_party/wiredtiger/test/format/lrt.c
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -71,12 +71,12 @@ lrt(void *arg)
for (pinned = 0;;) {
if (pinned) {
/* Re-read the record at the end of the table. */
- while ((ret = read_row(
- cursor, &key, &value, saved_keyno)) == WT_ROLLBACK)
+ while ((ret = read_row_worker(cursor,
+ saved_keyno, &key, &value, false)) == WT_ROLLBACK)
;
if (ret != 0)
testutil_die(ret,
- "read_row %" PRIu64, saved_keyno);
+ "read_row_worker %" PRIu64, saved_keyno);
/* Compare the previous value with the current one. */
if (g.type == FIX) {
@@ -131,13 +131,14 @@ lrt(void *arg)
saved_keyno = mmrand(NULL,
(u_int)(g.key_cnt - g.key_cnt / 10),
(u_int)g.key_cnt);
- while ((ret = read_row(cursor,
- &key, &value, saved_keyno)) == WT_ROLLBACK)
+ while ((ret = read_row_worker(cursor,
+ saved_keyno,
+ &key, &value, false)) == WT_ROLLBACK)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
testutil_die(ret,
- "read_row %" PRIu64, saved_keyno);
+ "read_row_worker %" PRIu64, saved_keyno);
/* Copy the cursor's value. */
if (g.type == FIX) {
@@ -160,12 +161,13 @@ lrt(void *arg)
*/
do {
keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5);
- while ((ret = read_row(cursor,
- &key, &value, keyno)) == WT_ROLLBACK)
+ while ((ret = read_row_worker(cursor,
+ keyno, &key, &value, false)) == WT_ROLLBACK)
;
} while (ret == WT_NOTFOUND);
if (ret != 0)
- testutil_die(ret, "read_row %" PRIu64, keyno);
+ testutil_die(ret,
+ "read_row_worker %" PRIu64, keyno);
pinned = 1;
}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 6ac2f10af95..bc11c2ba8f8 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -28,23 +28,19 @@
#include "format.h"
-static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
-static int col_modify(
- TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool);
-static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool);
-static int col_reserve(WT_CURSOR *, uint64_t, bool);
-static int col_update(
- TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool);
-static int nextprev(WT_CURSOR *, int);
+static int col_insert(TINFO *, WT_CURSOR *);
+static int col_modify(TINFO *, WT_CURSOR *, bool);
+static int col_remove(TINFO *, WT_CURSOR *, bool);
+static int col_reserve(TINFO *, WT_CURSOR *, bool);
+static int col_update(TINFO *, WT_CURSOR *, bool);
+static int nextprev(TINFO *, WT_CURSOR *, bool);
static WT_THREAD_RET ops(void *);
-static int row_insert(
- TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool);
-static int row_modify(
- TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool);
-static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool);
-static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool);
-static int row_update(
- TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool);
+static int read_row(TINFO *, WT_CURSOR *);
+static int row_insert(TINFO *, WT_CURSOR *, bool);
+static int row_modify(TINFO *, WT_CURSOR *, bool);
+static int row_remove(TINFO *, WT_CURSOR *, bool);
+static int row_reserve(TINFO *, WT_CURSOR *, bool);
+static int row_update(TINFO *, WT_CURSOR *, bool);
static void table_append_init(void);
#ifdef HAVE_BERKELEY_DB
@@ -273,8 +269,10 @@ wts_ops(int lastrun)
free(tinfo_list);
}
+typedef enum { INSERT, MODIFY, READ, REMOVE, UPDATE } thread_op;
typedef struct {
- uint64_t keyno; /* Row number */
+ thread_op op; /* Operation */
+ uint64_t keyno; /* Row number */
void *kdata; /* If an insert, the generated key */
size_t ksize;
@@ -283,37 +281,36 @@ typedef struct {
void *vdata; /* If not a delete, the value */
size_t vsize;
size_t vmemsize;
-
- bool deleted; /* Delete operation */
- bool insert; /* Insert operation */
} SNAP_OPS;
-#define SNAP_TRACK \
- (snap != NULL && (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list))
+#define SNAP_TRACK(op, keyno, key, value) do { \
+ if (snap != NULL && \
+ (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) \
+ snap_track(snap++, op, keyno, key, value); \
+} while (0)
/*
* snap_track --
* Add a single snapshot isolation returned value to the list.
*/
static void
-snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value)
+snap_track(
+ SNAP_OPS *snap, thread_op op, uint64_t keyno, WT_ITEM *key, WT_ITEM *value)
{
+ snap->op = op;
snap->keyno = keyno;
- if (key == NULL)
- snap->insert = false;
- else {
- snap->insert = true;
+ testutil_assert(key == NULL || (op == INSERT && g.type == ROW));
+ if (key != NULL) {
if (snap->kmemsize < key->size) {
snap->kdata = drealloc(snap->kdata, key->size);
snap->kmemsize = key->size;
}
memcpy(snap->kdata, key->data, snap->ksize = key->size);
}
- if (value == NULL)
- snap->deleted = true;
- else {
- snap->deleted = false;
+
+ testutil_assert(value != NULL || op == REMOVE);
+ if (value != NULL) {
if (snap->vmemsize < value->size) {
snap->vdata = drealloc(snap->vdata, value->size);
snap->vmemsize = value->size;
@@ -346,7 +343,11 @@ snap_check(WT_CURSOR *cursor,
* unique generated key we saved, else generate the key from the
* key number.
*/
- if (start->insert == 0) {
+ if (start->op == INSERT && g.type == ROW) {
+ key->data = start->kdata;
+ key->size = start->ksize;
+ cursor->set_key(cursor, key);
+ } else {
switch (g.type) {
case FIX:
case VAR:
@@ -357,10 +358,6 @@ snap_check(WT_CURSOR *cursor,
cursor->set_key(cursor, key);
break;
}
- } else {
- key->data = start->kdata;
- key->size = start->ksize;
- cursor->set_key(cursor, key);
}
if ((ret = cursor->search(cursor)) == 0) {
if (g.type == FIX) {
@@ -376,11 +373,11 @@ snap_check(WT_CURSOR *cursor,
return (ret);
/* Check for simple matches. */
- if (ret == 0 && !start->deleted &&
+ if (ret == 0 && start->op != REMOVE &&
value->size == start->vsize &&
memcmp(value->data, start->vdata, value->size) == 0)
continue;
- if (ret == WT_NOTFOUND && start->deleted)
+ if (ret == WT_NOTFOUND && start->op == REMOVE)
continue;
/*
@@ -392,7 +389,7 @@ snap_check(WT_CURSOR *cursor,
if (ret == WT_NOTFOUND &&
start->vsize == 1 && *(uint8_t *)start->vdata == 0)
continue;
- if (start->deleted &&
+ if (start->op == REMOVE &&
value->size == 1 && *(uint8_t *)value->data == 0)
continue;
}
@@ -404,7 +401,7 @@ snap_check(WT_CURSOR *cursor,
"snapshot-isolation: %" PRIu64 " search: "
"expected {0x%02x}, found {0x%02x}",
start->keyno,
- start->deleted ? 0 : *(uint8_t *)start->vdata,
+ start->op == REMOVE ? 0 : *(uint8_t *)start->vdata,
ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
/* NOTREACHED */
case ROW:
@@ -412,7 +409,7 @@ snap_check(WT_CURSOR *cursor,
"snapshot-isolation %.*s search mismatch\n",
(int)key->size, (const char *)key->data);
- if (start->deleted)
+ if (start->op == REMOVE)
fprintf(stderr, "expected {deleted}\n");
else
print_item_data(
@@ -432,7 +429,7 @@ snap_check(WT_CURSOR *cursor,
"snapshot-isolation %" PRIu64 " search mismatch\n",
start->keyno);
- if (start->deleted)
+ if (start->op == REMOVE)
fprintf(stderr, "expected {deleted}\n");
else
print_item_data(
@@ -547,19 +544,17 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session)
static WT_THREAD_RET
ops(void *arg)
{
- enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op;
- SNAP_OPS *snap, snap_list[64];
+ SNAP_OPS *snap, snap_list[128];
TINFO *tinfo;
WT_CONNECTION *conn;
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_ITEM *key, _key, *value, _value;
WT_SESSION *session;
- uint64_t keyno, reset_op, session_op;
+ thread_op op;
+ uint64_t reset_op, session_op;
uint32_t rnd;
- u_int i, iso_config;
- int dir;
- bool intxn, positioned, readonly;
+ u_int i, j, iso_config;
+ bool intxn, next, positioned, readonly;
tinfo = arg;
@@ -572,10 +567,10 @@ ops(void *arg)
memset(snap_list, 0, sizeof(snap_list));
/* Set up the default key and value buffers. */
- key = &_key;
- key_gen_init(key);
- value = &_value;
- val_gen_init(value);
+ tinfo->key = &tinfo->_key;
+ key_gen_init(tinfo->key);
+ tinfo->value = &tinfo->_value;
+ val_gen_init(tinfo->value);
/* Set the first operation where we'll create sessions and cursors. */
cursor = NULL;
@@ -682,8 +677,7 @@ ops(void *arg)
}
/* Select a row. */
- keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
- positioned = false;
+ tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows);
/* Select an operation. */
op = READ;
@@ -710,13 +704,12 @@ ops(void *arg)
positioned = false;
if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) {
++tinfo->search;
- ret = read_row(cursor, key, value, keyno);
+ ret = read_row(tinfo, cursor);
if (ret == 0) {
positioned = true;
- if (SNAP_TRACK)
- snap_track(snap++, keyno, NULL, value);
+ SNAP_TRACK(
+ READ, tinfo->keyno, NULL, tinfo->value);
} else {
- positioned = false;
if (ret == WT_ROLLBACK && intxn)
goto deadlock;
testutil_assert(ret == WT_NOTFOUND);
@@ -727,12 +720,11 @@ ops(void *arg)
if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) {
switch (g.type) {
case ROW:
- ret =
- row_reserve(cursor, key, keyno, positioned);
+ ret = row_reserve(tinfo, cursor, positioned);
break;
case FIX:
case VAR:
- ret = col_reserve(cursor, keyno, positioned);
+ ret = col_reserve(tinfo, cursor, positioned);
break;
}
if (ret == 0) {
@@ -751,8 +743,7 @@ ops(void *arg)
case INSERT:
switch (g.type) {
case ROW:
- ret = row_insert(tinfo,
- cursor, key, value, keyno, positioned);
+ ret = row_insert(tinfo, cursor, positioned);
break;
case FIX:
case VAR:
@@ -764,8 +755,7 @@ ops(void *arg)
if (g.append_cnt >= g.append_max)
goto update_instead_of_chosen_op;
- ret = col_insert(
- tinfo, cursor, key, value, &keyno);
+ ret = col_insert(tinfo, cursor);
break;
}
@@ -773,9 +763,9 @@ ops(void *arg)
positioned = false;
if (ret == 0) {
++tinfo->insert;
- if (SNAP_TRACK)
- snap_track(snap++, keyno,
- g.type == ROW ? key : NULL, value);
+ SNAP_TRACK(INSERT, tinfo->keyno,
+ g.type == ROW ? tinfo->key : NULL,
+ tinfo->value);
} else {
if (ret == WT_ROLLBACK && intxn)
goto deadlock;
@@ -793,18 +783,16 @@ ops(void *arg)
++tinfo->update;
switch (g.type) {
case ROW:
- ret = row_modify(tinfo, cursor,
- key, value, keyno, positioned);
+ ret = row_modify(tinfo, cursor, positioned);
break;
case VAR:
- ret = col_modify(tinfo, cursor,
- key, value, keyno, positioned);
+ ret = col_modify(tinfo, cursor, positioned);
break;
}
if (ret == 0) {
positioned = true;
- if (SNAP_TRACK)
- snap_track(snap++, keyno, NULL, value);
+ SNAP_TRACK(
+ MODIFY, tinfo->keyno, NULL, tinfo->value);
} else {
positioned = false;
if (ret == WT_ROLLBACK && intxn)
@@ -815,11 +803,11 @@ ops(void *arg)
break;
case READ:
++tinfo->search;
- ret = read_row(cursor, key, value, keyno);
+ ret = read_row(tinfo, cursor);
if (ret == 0) {
positioned = true;
- if (SNAP_TRACK)
- snap_track(snap++, keyno, NULL, value);
+ SNAP_TRACK(
+ READ, tinfo->keyno, NULL, tinfo->value);
} else {
positioned = false;
if (ret == WT_ROLLBACK && intxn)
@@ -831,12 +819,12 @@ ops(void *arg)
switch (g.type) {
case ROW:
ret =
- row_remove(cursor, key, keyno, positioned);
+ row_remove(tinfo, cursor, positioned);
break;
case FIX:
case VAR:
ret =
- col_remove(cursor, key, keyno, positioned);
+ col_remove(tinfo, cursor, positioned);
break;
}
if (ret == 0) {
@@ -845,8 +833,7 @@ ops(void *arg)
* Don't set positioned: it's unchanged from the
* previous state, but not necessarily set.
*/
- if (SNAP_TRACK)
- snap_track(snap++, keyno, NULL, NULL);
+ SNAP_TRACK(REMOVE, tinfo->keyno, NULL, NULL);
} else {
positioned = false;
if (ret == WT_ROLLBACK && intxn)
@@ -859,19 +846,17 @@ update_instead_of_chosen_op:
++tinfo->update;
switch (g.type) {
case ROW:
- ret = row_update(tinfo, cursor,
- key, value, keyno, positioned);
+ ret = row_update(tinfo, cursor, positioned);
break;
case FIX:
case VAR:
- ret = col_update(tinfo, cursor,
- key, value, keyno, positioned);
+ ret = col_update(tinfo, cursor, positioned);
break;
}
if (ret == 0) {
positioned = true;
- if (SNAP_TRACK)
- snap_track(snap++, keyno, NULL, value);
+ SNAP_TRACK(
+ UPDATE, tinfo->keyno, NULL, tinfo->value);
} else {
positioned = false;
if (ret == WT_ROLLBACK && intxn)
@@ -887,9 +872,10 @@ update_instead_of_chosen_op:
* a random direction.
*/
if (positioned) {
- dir = (int)mmrand(&tinfo->rnd, 0, 1);
- for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) {
- if ((ret = nextprev(cursor, dir)) == 0)
+ next = mmrand(&tinfo->rnd, 0, 1) == 1;
+ j = mmrand(&tinfo->rnd, 1, 100);
+ for (i = 0; i < j; ++i) {
+ if ((ret = nextprev(tinfo, cursor, next)) == 0)
continue;
if (ret == WT_ROLLBACK && intxn)
goto deadlock;
@@ -912,9 +898,13 @@ update_instead_of_chosen_op:
* Ending the transaction. If in snapshot isolation, repeat the
* operations and confirm they're unchanged.
*/
- if (snap != NULL && (ret = snap_check(
- cursor, snap_list, snap, key, value)) == WT_ROLLBACK)
- goto deadlock;
+ if (snap != NULL) {
+ ret = snap_check(
+ cursor, snap_list, snap, tinfo->key, tinfo->value);
+ testutil_assert(ret == 0 || ret == WT_ROLLBACK);
+ if (ret == WT_ROLLBACK)
+ goto deadlock;
+ }
/*
* If we're in a transaction, commit 40% of the time and
@@ -945,8 +935,8 @@ deadlock: ++tinfo->deadlock;
free(snap_list[i].kdata);
free(snap_list[i].vdata);
}
- key_gen_teardown(key);
- val_gen_teardown(value);
+ key_gen_teardown(tinfo->key);
+ val_gen_teardown(tinfo->value);
tinfo->state = TINFO_COMPLETE;
return (WT_THREAD_RET_VALUE);
@@ -993,7 +983,8 @@ wts_read_scan(void)
last_keyno = keyno;
}
- switch (ret = read_row(cursor, &key, &value, keyno)) {
+ switch (ret = read_row_worker(
+ cursor, keyno, &key, &value, false)) {
case 0:
case WT_NOTFOUND:
case WT_ROLLBACK:
@@ -1011,13 +1002,13 @@ wts_read_scan(void)
}
/*
- * read_row --
+ * read_row_worker --
* Read and verify a single element in a row- or column-store file.
*/
int
-read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
+read_row_worker(
+ WT_CURSOR *cursor, uint64_t keyno, WT_ITEM *key, WT_ITEM *value, bool sn)
{
- static int sn = 0;
WT_SESSION *session;
uint8_t bitfield;
int exact, ret;
@@ -1045,11 +1036,8 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
ret = cursor->search_near(cursor, &exact);
if (ret == 0 && exact != 0)
ret = WT_NOTFOUND;
- sn = 0;
- } else {
+ } else
ret = cursor->search(cursor);
- sn = 1;
- }
switch (ret) {
case 0:
if (g.type == FIX) {
@@ -1062,13 +1050,14 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
case WT_NOTFOUND:
/*
* In fixed length stores, zero values at the end of the key
- * space are returned as not found. Treat this the same as
+ * space are returned as not-found. Treat this the same as
* a zero value in the key space, to match BDB's behavior.
+ * The WiredTiger cursor has lost its position though, so
+ * we return not-found, the cursor movement can't continue.
*/
if (g.type == FIX) {
*(uint8_t *)(value->data) = 0;
value->size = 1;
- ret = 0;
}
break;
case WT_ROLLBACK:
@@ -1107,20 +1096,34 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
}
/*
+ * read_row --
+ * Read and verify a single element in a row- or column-store file.
+ */
+static int
+read_row(TINFO *tinfo, WT_CURSOR *cursor)
+{
+ /* 25% of the time we call search-near. */
+ return (read_row_worker(cursor, tinfo->keyno,
+ tinfo->key, tinfo->value, mmrand(&tinfo->rnd, 0, 3) == 1));
+}
+
+/*
* nextprev --
* Read and verify the next/prev element in a row- or column-store file.
*/
static int
-nextprev(WT_CURSOR *cursor, int next)
+nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
{
WT_DECL_RET;
WT_ITEM key, value;
uint64_t keyno;
uint8_t bitfield;
+ int cmp;
const char *which;
+ bool incrementing;
keyno = 0;
- which = next ? "next" : "prev";
+ which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev";
switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) {
case 0:
@@ -1143,6 +1146,57 @@ nextprev(WT_CURSOR *cursor, int next)
}
if (ret != 0)
testutil_die(ret, "nextprev: get_key/get_value");
+
+ /* Check that keys are never returned out-of-order. */
+ /*
+ * XXX
+ * WT-3889
+ * LSM has a bug that prevents cursor order checks from
+ * working, skip the test for now.
+ */
+ if (DATASOURCE("lsm"))
+ break;
+
+ switch (g.type) {
+ case FIX:
+ case VAR:
+ testutil_assertfmt(
+ !next || tinfo->keyno < keyno,
+ "%s returned %" PRIu64 " then %" PRIu64,
+ which, tinfo->keyno, keyno);
+ testutil_assertfmt(
+ next || tinfo->keyno > keyno,
+ "%s returned %" PRIu64 " then %" PRIu64,
+ which, tinfo->keyno, keyno);
+
+ tinfo->keyno = keyno;
+ break;
+ case ROW:
+ cmp = memcmp(tinfo->key->data, key.data,
+ WT_MIN(tinfo->key->size, key.size));
+ incrementing =
+ (next && !g.c_reverse) || (!next && g.c_reverse);
+ testutil_assertfmt(
+ !incrementing ||
+ cmp < 0 ||
+ (cmp == 0 && tinfo->key->size < key.size),
+ "%s returned {%.*s} then {%.*s}",
+ which,
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)key.size, key.data);
+ testutil_assertfmt(
+ incrementing ||
+ cmp > 0 ||
+ (cmp == 0 && tinfo->key->size > key.size),
+ "%s returned {%.*s} then {%.*s}",
+ which,
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)key.size, key.data);
+
+ testutil_check(__wt_buf_set((WT_SESSION_IMPL *)
+ cursor->session, tinfo->key, key.data, key.size));
+ break;
+ }
break;
case WT_NOTFOUND:
break;
@@ -1225,18 +1279,19 @@ mismatch: if (g.type == ROW) {
* Reserve a row in a row-store file.
*/
static int
-row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
+row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned) {
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s{%.*s}", "reserve", (int)key->size, key->data);
+ "%-10s{%.*s}", "reserve",
+ (int)tinfo->key->size, tinfo->key->data);
switch (ret = cursor->reserve(cursor)) {
case 0:
@@ -1248,7 +1303,8 @@ row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
return (WT_NOTFOUND);
default:
testutil_die(ret,
- "row_reserve: reserve row %" PRIu64 " by key", keyno);
+ "row_reserve: reserve row %" PRIu64 " by key",
+ tinfo->keyno);
}
return (0);
}
@@ -1258,16 +1314,16 @@ row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
* Reserve a row in a column-store file.
*/
static int
-col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned)
+col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned)
- cursor->set_key(cursor, keyno);
+ cursor->set_key(cursor, tinfo->keyno);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s%" PRIu64, "reserve", keyno);
+ "%-10s%" PRIu64, "reserve", tinfo->keyno);
switch (ret = cursor->reserve(cursor)) {
case 0:
@@ -1278,7 +1334,7 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned)
case WT_NOTFOUND:
return (WT_NOTFOUND);
default:
- testutil_die(ret, "col_reserve: %" PRIu64, keyno);
+ testutil_die(ret, "col_reserve: %" PRIu64, tinfo->keyno);
}
return (0);
}
@@ -1314,22 +1370,21 @@ modify_build(TINFO *tinfo, WT_MODIFY *entries, int *nentriesp)
* Modify a row in a row-store file.
*/
static int
-row_modify(TINFO *tinfo, WT_CURSOR *cursor,
- WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned)
+row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
WT_MODIFY entries[MAX_MODIFY_ENTRIES];
int nentries;
if (!positioned) {
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
modify_build(tinfo, entries, &nentries);
switch (ret = cursor->modify(cursor, entries, nentries)) {
case 0:
- testutil_check(cursor->get_value(cursor, value));
+ testutil_check(cursor->get_value(cursor, tinfo->value));
break;
case WT_CACHE_FULL:
case WT_ROLLBACK:
@@ -1338,20 +1393,23 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor,
return (WT_NOTFOUND);
default:
testutil_die(ret,
- "row_modify: modify row %" PRIu64 " by key", keyno);
+ "row_modify: modify row %" PRIu64 " by key", tinfo->keyno);
}
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}, {%.*s}",
"modify",
- (int)key->size, key->data, (int)value->size, value->data);
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- bdb_update(key->data, key->size, value->data, value->size);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1361,20 +1419,19 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor,
* Modify a row in a column-store file.
*/
static int
-col_modify(TINFO *tinfo, WT_CURSOR *cursor,
- WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned)
+col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
WT_MODIFY entries[MAX_MODIFY_ENTRIES];
int nentries;
if (!positioned)
- cursor->set_key(cursor, keyno);
+ cursor->set_key(cursor, tinfo->keyno);
modify_build(tinfo, entries, &nentries);
switch (ret = cursor->modify(cursor, entries, nentries)) {
case 0:
- testutil_check(cursor->get_value(cursor, value));
+ testutil_check(cursor->get_value(cursor, tinfo->value));
break;
case WT_CACHE_FULL:
case WT_ROLLBACK:
@@ -1382,23 +1439,25 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor,
case WT_NOTFOUND:
return (WT_NOTFOUND);
default:
- testutil_die(ret, "col_modify: modify row %" PRIu64, keyno);
+ testutil_die(ret,
+ "col_modify: modify row %" PRIu64, tinfo->keyno);
}
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}, {%.*s}",
"modify",
- (int)key->size, key->data, (int)value->size, value->data);
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- key_gen(key, keyno);
- bdb_update(key->data, key->size, value->data, value->size);
-#else
- (void)key; /* [-Wunused-variable] */
+ key_gen(tinfo->key, tinfo->keyno);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1408,23 +1467,23 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor,
* Update a row in a row-store file.
*/
static int
-row_update(TINFO *tinfo, WT_CURSOR *cursor,
- WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned)
+row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned) {
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
- val_gen(&tinfo->rnd, value, keyno);
- cursor->set_value(cursor, value);
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}, {%.*s}",
"put",
- (int)key->size, key->data, (int)value->size, value->data);
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
switch (ret = cursor->update(cursor)) {
case 0:
@@ -1434,14 +1493,16 @@ row_update(TINFO *tinfo, WT_CURSOR *cursor,
return (WT_ROLLBACK);
default:
testutil_die(ret,
- "row_update: update row %" PRIu64 " by key", keyno);
+ "row_update: update row %" PRIu64 " by key", tinfo->keyno);
}
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- bdb_update(key->data, key->size, value->data, value->size);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1451,30 +1512,30 @@ row_update(TINFO *tinfo, WT_CURSOR *cursor,
* Update a row in a column-store file.
*/
static int
-col_update(TINFO *tinfo, WT_CURSOR *cursor,
- WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned)
+col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned)
- cursor->set_key(cursor, keyno);
- val_gen(&tinfo->rnd, value, keyno);
+ cursor->set_key(cursor, tinfo->keyno);
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
if (g.type == FIX)
- cursor->set_value(cursor, *(uint8_t *)value->data);
+ cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
else
- cursor->set_value(cursor, value);
+ cursor->set_value(cursor, tinfo->value);
if (g.logging == LOG_OPS) {
if (g.type == FIX)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64 " {0x%02" PRIx8 "}",
- "update", keyno,
- ((uint8_t *)value->data)[0]);
+ "update", tinfo->keyno,
+ ((uint8_t *)tinfo->value->data)[0]);
else
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64 " {%.*s}",
- "update", keyno,
- (int)value->size, (char *)value->data);
+ "update", tinfo->keyno,
+ (int)tinfo->value->size,
+ (char *)tinfo->value->data);
}
switch (ret = cursor->update(cursor)) {
@@ -1484,17 +1545,17 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor,
case WT_ROLLBACK:
return (WT_ROLLBACK);
default:
- testutil_die(ret, "col_update: %" PRIu64, keyno);
+ testutil_die(ret, "col_update: %" PRIu64, tinfo->keyno);
}
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- key_gen(key, keyno);
- bdb_update(key->data, key->size, value->data, value->size);
-#else
- (void)key; /* [-Wunused-variable] */
+ key_gen(tinfo->key, tinfo->keyno);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1603,8 +1664,7 @@ table_append(uint64_t keyno)
* Insert a row in a row-store file.
*/
static int
-row_insert(TINFO *tinfo, WT_CURSOR *cursor,
- WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned)
+row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
@@ -1613,18 +1673,19 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor,
* the insert method. Otherwise, generate a unique key and insert.
*/
if (!positioned) {
- key_gen_insert(&tinfo->rnd, key, keyno);
- cursor->set_key(cursor, key);
+ key_gen_insert(&tinfo->rnd, tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
- val_gen(&tinfo->rnd, value, keyno);
- cursor->set_value(cursor, value);
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}, {%.*s}",
"insert",
- (int)key->size, key->data, (int)value->size, value->data);
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
switch (ret = cursor->insert(cursor)) {
case 0:
@@ -1634,14 +1695,16 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor,
return (WT_ROLLBACK);
default:
testutil_die(ret,
- "row_insert: insert row %" PRIu64 " by key", keyno);
+ "row_insert: insert row %" PRIu64 " by key", tinfo->keyno);
}
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- bdb_update(key->data, key->size, value->data, value->size);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1651,17 +1714,15 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor,
* Insert an element in a column-store file.
*/
static int
-col_insert(TINFO *tinfo,
- WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
+col_insert(TINFO *tinfo, WT_CURSOR *cursor)
{
WT_DECL_RET;
- uint64_t keyno;
- val_gen(&tinfo->rnd, value, g.rows + 1);
+ val_gen(&tinfo->rnd, tinfo->value, g.rows + 1);
if (g.type == FIX)
- cursor->set_value(cursor, *(uint8_t *)value->data);
+ cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
else
- cursor->set_value(cursor, value);
+ cursor->set_value(cursor, tinfo->value);
switch (ret = cursor->insert(cursor)) {
case 0:
break;
@@ -1671,32 +1732,32 @@ col_insert(TINFO *tinfo,
default:
testutil_die(ret, "cursor.insert");
}
- testutil_check(cursor->get_key(cursor, &keyno));
- *keynop = (uint32_t)keyno;
+ testutil_check(cursor->get_key(cursor, &tinfo->keyno));
- table_append(keyno); /* Extend the object. */
+ table_append(tinfo->keyno); /* Extend the object. */
if (g.logging == LOG_OPS) {
if (g.type == FIX)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64 " {0x%02" PRIx8 "}",
- "insert", keyno,
- ((uint8_t *)value->data)[0]);
+ "insert", tinfo->keyno,
+ ((uint8_t *)tinfo->value->data)[0]);
else
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64 " {%.*s}",
- "insert", keyno,
- (int)value->size, (char *)value->data);
+ "insert", tinfo->keyno,
+ (int)tinfo->value->size,
+ (char *)tinfo->value->data);
}
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (0);
- key_gen(key, keyno);
- bdb_update(key->data, key->size, value->data, value->size);
-#else
- (void)key; /* [-Wunused-variable] */
+ key_gen(tinfo->key, tinfo->keyno);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1706,18 +1767,18 @@ col_insert(TINFO *tinfo,
* Remove an row from a row-store file.
*/
static int
-row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
+row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned) {
- key_gen(key, keyno);
- cursor->set_key(cursor, key);
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", keyno);
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
@@ -1730,7 +1791,7 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
return (WT_ROLLBACK);
default:
testutil_die(ret,
- "row_remove: remove %" PRIu64 " by key", keyno);
+ "row_remove: remove %" PRIu64 " by key", tinfo->keyno);
}
#ifdef HAVE_BERKELEY_DB
@@ -1740,11 +1801,9 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
{
int notfound;
- bdb_remove(keyno, &notfound);
- (void)notfound_chk("row_remove", ret, notfound, keyno);
+ bdb_remove(tinfo->keyno, &notfound);
+ (void)notfound_chk("row_remove", ret, notfound, tinfo->keyno);
}
-#else
- (void)key; /* [-Wunused-variable] */
#endif
return (ret);
}
@@ -1754,16 +1813,16 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
* Remove a row from a column-store file.
*/
static int
-col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
+col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
{
WT_DECL_RET;
if (!positioned)
- cursor->set_key(cursor, keyno);
+ cursor->set_key(cursor, tinfo->keyno);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", keyno);
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
@@ -1776,7 +1835,7 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
return (WT_ROLLBACK);
default:
testutil_die(ret,
- "col_remove: remove %" PRIu64 " by key", keyno);
+ "col_remove: remove %" PRIu64 " by key", tinfo->keyno);
}
#ifdef HAVE_BERKELEY_DB
@@ -1788,16 +1847,14 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned)
* do the same thing for the BDB store.
*/
if (g.type == FIX) {
- key_gen(key, keyno);
- bdb_update(key->data, key->size, "", 1);
+ key_gen(tinfo->key, tinfo->keyno);
+ bdb_update(tinfo->key->data, tinfo->key->size, "", 1);
} else {
int notfound;
- bdb_remove(keyno, &notfound);
- (void)notfound_chk("col_remove", ret, notfound, keyno);
+ bdb_remove(tinfo->keyno, &notfound);
+ (void)notfound_chk("col_remove", ret, notfound, tinfo->keyno);
}
-#else
- (void)key; /* [-Wunused-variable] */
#endif
return (ret);
}
diff --git a/src/third_party/wiredtiger/test/suite/test_assert04.py b/src/third_party/wiredtiger/test/suite/test_assert04.py
index d582535fd0f..520a7a6a16d 100644
--- a/src/third_party/wiredtiger/test/suite/test_assert04.py
+++ b/src/third_party/wiredtiger/test/suite/test_assert04.py
@@ -94,7 +94,7 @@ class test_assert04(wttest.WiredTigerTestCase, suite_subprocess):
c.close()
# We must move the oldest timestamp forward in order to alter.
- # Otherwise alter's closing of the file will fail with EBUSY.
+ # Otherwise alter closing the file will fail with EBUSY.
self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(2))
# Now alter the setting and make sure we detect incorrect usage.
@@ -345,5 +345,51 @@ class test_assert04(wttest.WiredTigerTestCase, suite_subprocess):
self.assertEquals(c['key_nots'], 'value_nots1')
c.close()
+ # Confirm it is okay to set the timestamp in the middle or end of the
+ # transaction. That should set the timestamp for the whole thing.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key_ts5'] = 'value_notsyet'
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(20))
+ c['key_ts5'] = 'value20'
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.assertEquals(c['key_ts5'], 'value20')
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key_ts6'] = 'value_notsyet'
+ c['key_ts6'] = 'value21_after'
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(21))
+ self.session.commit_transaction()
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.assertEquals(c['key_ts6'], 'value21_after')
+ c.close()
+
+ # Confirm it is okay to set the timestamp on the commit call.
+ # That should set the timestamp for the whole thing.
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key_ts6'] = 'value_committs1'
+ c['key_ts6'] = 'value22'
+ self.session.commit_transaction('commit_timestamp=' +
+ timestamp_str(22))
+ c.close()
+
+ c = self.session.open_cursor(uri)
+ self.session.begin_transaction()
+ c['key_nots'] = 'value23'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.commit_transaction(
+ 'commit_timestamp=' + timestamp_str(23)), msg_usage)
+ c.close()
+
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare01.py b/src/third_party/wiredtiger/test/suite/test_prepare01.py
new file mode 100644
index 00000000000..f4ef7248228
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_prepare01.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+# test_prepare01.py
+# Transactions: basic functionality with prepare
+class test_prepare01(wttest.WiredTigerTestCase):
+ nentries = 1000
+ scenarios = make_scenarios([
+ ('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')),
+ ('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')),
+ ('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')),
+ ('fix-t', dict(uri='table:text_txn01',key_format='r',value_format='8t')),
+ ('row-f', dict(uri='file:text_txn01',key_format='S',value_format='S')),
+ ('row-t', dict(uri='table:text_txn01',key_format='S',value_format='S')),
+ ])
+
+ # Return the number of records visible to the cursor.
+ def cursor_count(self, cursor):
+ count = 0
+ # Column-store appends result in phantoms, ignore records unless they
+ # have our flag value.
+ for r in cursor:
+ if self.value_format == 'S' or cursor.get_value() == 0xab:
+ count += 1
+ return count
+
+ # Checkpoint the database and assert the number of records visible to the
+ # checkpoint matches the expected value.
+ def check_checkpoint(self, expected):
+ s = self.conn.open_session()
+ s.checkpoint("name=test")
+ cursor = s.open_cursor(self.uri, None, "checkpoint=test")
+ self.assertEqual(self.cursor_count(cursor), expected)
+ s.close()
+
+ # Open a cursor with specified isolation level, and assert the number of
+ # records visible to the cursor matches the expected value.
+ def check_txn_cursor(self, level, expected):
+ s = self.conn.open_session()
+ cursor = s.open_cursor(self.uri, None)
+ s.begin_transaction(level)
+ self.assertEqual(self.cursor_count(cursor), expected)
+ s.close()
+
+ # Open a session with specified isolation level, and assert the number of
+ # records visible to the cursor matches the expected value.
+ def check_txn_session(self, level, expected):
+ s = self.conn.open_session(level)
+ cursor = s.open_cursor(self.uri, None)
+ # Currently ignore_prepare is not realized yet, hence no effect.
+ s.begin_transaction("ignore_prepare=true")
+ self.assertEqual(self.cursor_count(cursor), expected)
+ s.close()
+
+ def check(self, cursor, committed, total):
+ # The cursor itself should see all of the records.
+ if cursor != None:
+ cursor.reset()
+ self.assertEqual(self.cursor_count(cursor), total)
+
+ # Read-uncommitted should see all of the records.
+ # Snapshot and read-committed should see only committed records.
+ self.check_txn_cursor('isolation=read-uncommitted', total)
+ self.check_txn_session('isolation=read-uncommitted', total)
+
+ self.check_txn_cursor('isolation=snapshot', committed)
+ self.check_txn_session('isolation=snapshot', committed)
+
+ self.check_txn_cursor('isolation=read-committed', committed)
+ self.check_txn_session('isolation=read-committed', committed)
+
+ # Checkpoints should only write committed items.
+ self.check_checkpoint(committed)
+
+ # Loop through a set of inserts, periodically committing; before each
+ # commit, verify the number of visible records matches the expected value.
+ def test_visibility(self):
+ self.session.create(self.uri,
+ 'key_format=' + self.key_format +
+ ',value_format=' + self.value_format)
+
+ committed = 0
+ cursor = self.session.open_cursor(self.uri, None)
+ self.check(cursor, 0, 0)
+ msg = "/prepare_transaction is not supported/"
+ # Currently ignore_prepare is not realized yet, hence no effect.
+ self.session.begin_transaction("ignore_prepare=false")
+ for i in xrange(self.nentries):
+ if i > 0 and i % (self.nentries / 37) == 0:
+ self.check(cursor, committed, i)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(), msg)
+ self.session.commit_transaction()
+ committed = i
+ self.session.begin_transaction()
+
+ if self.key_format == 'S':
+ cursor.set_key("key: %06d" % i)
+ else:
+ cursor.set_key(i + 1)
+ if self.value_format == 'S':
+ cursor.set_value("value: %06d" % i)
+ else:
+ cursor.set_value(0xab)
+ cursor.insert()
+
+ self.check(cursor, committed, self.nentries)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(), msg)
+ self.session.commit_transaction()
+ self.check(cursor, self.nentries, self.nentries)
+
+# Test that read-committed is the default isolation level.
+class test_read_committed_default(wttest.WiredTigerTestCase):
+ uri = 'table:test_prepare'
+
+ # Return the number of records visible to the cursor.
+ def cursor_count(self, cursor):
+ count = 0
+ for r in cursor:
+ count += 1
+ return count
+
+ def test_read_committed_default(self):
+ self.session.create(self.uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(self.uri, None)
+ self.session.begin_transaction()
+ cursor['key: aaa'] = 'value: aaa'
+ msg = "/prepare_transaction is not supported/"
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(), msg)
+ self.session.commit_transaction()
+ self.session.begin_transaction()
+ cursor['key: bbb'] = 'value: bbb'
+
+ s = self.conn.open_session()
+ cursor = s.open_cursor(self.uri, None)
+ s.begin_transaction("isolation=read-committed")
+ self.assertEqual(self.cursor_count(cursor), 1)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(), msg)
+ s.commit_transaction()
+ s.begin_transaction(None)
+ self.assertEqual(self.cursor_count(cursor), 1)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(), msg)
+ s.commit_transaction()
+ s.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
index 54f9999a126..32457254950 100644
--- a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
+++ b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c
@@ -47,9 +47,9 @@ fail(int ret) {
int
main(int argc, char *argv[])
{
- int ret;
WT_CONNECTION *conn;
WT_SESSION *session;
+ int ret;
(void)argc;
(void)argv;
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
index a632e832f53..69ff53c290c 100644
--- a/src/third_party/wiredtiger/test/utility/misc.c
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -39,6 +39,10 @@ testutil_die(int e, const char *fmt, ...)
{
va_list ap;
+ /* Flush output to be sure it doesn't mix with fatal errors. */
+ (void)fflush(stdout);
+ (void)fflush(stderr);
+
/* Allow test programs to cleanup on fatal error. */
if (custom_die != NULL)
(*custom_die)();
diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c
index 1e1bd0bf575..4f70c562687 100644
--- a/src/third_party/wiredtiger/test/utility/thread.c
+++ b/src/third_party/wiredtiger/test/utility/thread.c
@@ -226,8 +226,8 @@ op_cursor(void *arg)
{
TEST_OPTS *opts;
TEST_PER_THREAD_OPTS *args;
- WT_SESSION *session;
WT_CURSOR *cursor;
+ WT_SESSION *session;
int ret;
args = (TEST_PER_THREAD_OPTS *)arg;
@@ -321,8 +321,8 @@ op_create_unique(void *arg)
void
op_drop(void *arg)
{
- TEST_PER_THREAD_OPTS *args;
TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
WT_RAND_STATE rnd;
WT_SESSION *session;
int ret;