summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-10-03 05:44:38 +0000
committerevergreen <evergreen@mongodb.com>2019-10-03 05:44:38 +0000
commit0d0748ae6896c7ab235dffb2a0c8a49e16fad7f8 (patch)
tree9fcbf110eaa9ef3148ac8e64f72a38aafdf878c8 /src
parent2961e15b5fd4534b324d78557afcf795aba056b6 (diff)
downloadmongo-0d0748ae6896c7ab235dffb2a0c8a49e16fad7f8.tar.gz
Import wiredtiger: e0041ca53c1c1a4a23cc7aaa7ef8137dc1c61117 from branch mongodb-4.4
ref: 0cd668bf3a..e0041ca53c for: 4.3.1 WT-4702 Switch to ubuntu1804-test Evergreen distro WT-4715 Workloads will stall if old transaction or timestamp pinned by thread co-opted for eviction WT-4961 Checkpoints with cache overflow must keep history for reads WT-5093 Enable million-collection-test working with Evergreen distro rhel80-build WT-5094 Enable Windows compile task working with Evergreen distro windows-64-vs2017-test WT-5122 Shut down the sweep server before doing the final checkpoint WT-5128 Add script to run wtperf with XRay profiling WT-5130 Enable Big-endian (s390x/zSeries) working with Evergreen distro ubuntu1804-zseries-build WT-5135 Change lookaside file inserts to use cursor.insert WT-5140 Fix where a cursor returning random items can use an uninitialized buffer WT-5143 Fix typo in error message
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/SConstruct1
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh122
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf.c3
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py25
-rw-r--r--src/third_party/wiredtiger/dist/filelist1
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list1
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c2
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c12
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c51
-rw-r--r--src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h6
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c97
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c49
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c8
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c36
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c11
-rw-r--r--src/third_party/wiredtiger/src/include/api.h4
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h38
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i6
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h26
-rw-r--r--src/third_party/wiredtiger/src/include/extern_posix.h3
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i43
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h9
-rw-r--r--src/third_party/wiredtiger/src/include/session.h7
-rw-r--r--src/third_party/wiredtiger/src/include/time.i182
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i55
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in49
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h1
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_time.c1
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_child.c5
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c136
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c36
-rw-r--r--src/third_party/wiredtiger/src/support/time.c109
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c151
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c27
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c29
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml30
-rw-r--r--src/third_party/wiredtiger/test/format/Makefile.am2
-rw-r--r--src/third_party/wiredtiger/test/format/config.h3
-rw-r--r--src/third_party/wiredtiger/test/format/format.h2
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c7
-rw-r--r--src/third_party/wiredtiger/test/format/random.c95
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c3
-rw-r--r--src/third_party/wiredtiger/test/suite/test_debug_mode05.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_las01.py9
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp04.py7
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp06.py26
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp11.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp16.py14
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn21.py49
55 files changed, 1011 insertions, 634 deletions
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct
index f895a53c426..f4d0d31dab0 100644
--- a/src/third_party/wiredtiger/SConstruct
+++ b/src/third_party/wiredtiger/SConstruct
@@ -496,6 +496,7 @@ t = env.Program("t_format",
"test/format/lrt.c",
"test/format/ops.c",
"test/format/rebalance.c",
+ "test/format/random.c",
"test/format/salvage.c",
"test/format/snap.c",
"test/format/t.c",
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh
new file mode 100644
index 00000000000..398c6a9bcf5
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# wtperf_xray.sh - run wtperf regression tests with xray profiling and generate
+# profiling information.
+#
+# This script assumes it is running in the directory with the wtperf executable.
+#
+# Usage
+# wtperf_xray.sh <wtperf-config-file> [-h output-directory] [wtperf other args]
+#
+# This script checks the first argument after the wtperf configuration to see
+# whether a home directory is being specified with the -h flag. If so, this
+# script will write its output files to that directory. Otherwise it will
+# default to WT_TEST (wtperf's default).
+#
+# Environment variables
+# XRAY_BINARY --
+# The binary to use to inspect the xray log. (default: llvm-xray)
+# FLAME_GRAPH_PATH --
+# The path to your copy of Brendan Gregg's FlameGraph script. (optional)
+#
+# When this is complete you can find information in the following files:
+# wtperf_account.txt --
+# The top 10 functions where the workload is spending the most time along
+# with a count, min, max and some percentiles for each one.
+# wtperf_stacks.txt --
+# The top 10 stack traces where the workload is spending the most time.
+# This calculation is done separately per thread.
+# wtperf_graph.svg --
+# A function call graph showing what functions call each other. The edges
+# are labelled and coloured proportionally to represent the ratio of time
+# spent in each function call.
+# wtperf_flame.svg --
+# A graph visualising stack traces and the time spent within each stack
+# frame. If FLAME_GRAPH_PATH is not specified, this graph won't be
+# generated.
+#
+if ! test -f ./wtperf; then
+ echo "$0: could not find wtperf in current working directory"
+ exit 1
+fi
+
+if test "$#" -lt "1"; then
+ echo "$0: must specify wtperf configuration to run"
+ exit 1
+fi
+
+# By default, wtperf uses WT_TEST as its home directory.
+xray_home="WT_TEST"
+if test "$2" = "-h"; then
+ if ! test -z "$3"; then
+ xray_home="$3"
+ fi
+fi
+echo "$0: using $xray_home as home directory"
+
+# Check symbols to ensure we've compiled with XRay.
+objdump_out=$(objdump -h -j xray_instr_map ./wtperf)
+if test -z "$objdump_out"; then
+ echo "$0: wtperf not compiled with xray, add '-fxray-instrument' to your CFLAGS"
+ exit 1
+fi
+
+if ! test -d "$xray_home"; then
+ echo "$0: creating directory $xray_home"
+ mkdir "$xray_home"
+fi
+
+xray_account_path="${xray_home}/wtperf_account.txt"
+xray_stack_path="${xray_home}/wtperf_stack.txt"
+xray_graph_path="${xray_home}/wtperf_graph.svg"
+xray_flame_path="${xray_home}/wtperf_flame.svg"
+
+rm xray-log.wtperf.* \
+ "$xray_account_path" \
+ "$xray_stack_path" \
+ "$xray_graph_path" \
+ "$xray_flame_path"
+
+export XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1"
+./wtperf -O "$@"
+
+xray_log=$(ls xray-log.wtperf.*)
+num_logs=$(echo "$xray_log" | wc -w)
+if test "$num_logs" -ne "1"; then
+ echo "$0: detected more than one xray log"
+ exit 1
+fi
+
+if test -z "$XRAY_BINARY"; then
+ xray_bin="llvm-xray"
+ echo "$0: XRAY_BINARY is unset, defaulting to $xray_bin"
+else
+ xray_bin="$XRAY_BINARY"
+fi
+
+$xray_bin account "$xray_log" \
+ -top=10 -sort=sum -sortorder=dsc -instr_map ./wtperf > \
+ "$xray_account_path"
+
+# Use the -per-thread-stacks option to get the top 10 stacks for each thread.
+# We could use the -aggregate-threads flag here so get the top stacks for all threads (omitting duplicates).
+$xray_bin stack -per-thread-stacks "$xray_log" \
+ -instr_map ./wtperf > \
+ "$xray_stack_path"
+
+# Generate a DOT graph.
+$xray_bin graph "$xray_log" \
+ -m ./wtperf -color-edges=sum -edge-label=sum | \
+ unflatten -f -l10 | \
+ dot -Tsvg -o "$xray_graph_path"
+
+# This file can be inspected in the Google Chrome Trace Viewer.
+# It seems to take a long time to generate this so just disable it for now.
+# $xray_bin convert -symbolize -instr_map=./wtperf -output-format=trace_event $xray_log | gzip > wtperf_trace.txt.gz
+if test -z "$FLAME_GRAPH_PATH"; then
+ echo "$0: FLAME_GRAPH_PATH is unset, skipping flame graph generation"
+else
+ $xray_bin stack "$xray_log" \
+ -instr_map ./wtperf -stack-format=flame -aggregation-type=time -all-stacks | \
+ "$FLAME_GRAPH_PATH" > "$xray_flame_path"
+fi
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
index cf12df3f2fc..b659d83cbc7 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c
@@ -654,8 +654,7 @@ worker(void *arg)
*/
measure_latency = opts->sample_interval != 0 && trk != NULL && trk->ops != 0 &&
(trk->ops % opts->sample_rate == 0);
- if (measure_latency)
- __wt_epoch(NULL, &start);
+ __wt_epoch(NULL, &start); /* [-Werror=maybe-uninitialized] */
cursor->set_key(cursor, key_buf);
switch (*op) {
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 3bd75b7187c..6d9d4f1db3d 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -605,6 +605,13 @@ connection_runtime_config = [
Config('lsm_merge', 'true', r'''
merge LSM chunks where possible (deprecated)''',
type='boolean', undoc=True),
+ Config('operation_timeout_ms', '0', r'''
+ when non-zero, a requested limit on the number of elapsed real time milliseconds
+ application threads will take to complete database operations. Time is measured from the
+ start of each WiredTiger API call. There is no guarantee any operation will not take
+ longer than this amount of time. If WiredTiger notices the limit has been exceeded, an
+ operation may return a WT_ROLLBACK error. Default is to have no limit''',
+ min=1),
Config('operation_tracking', '', r'''
enable tracking of performance-critical functions. See
@ref operation_tracking for more information''',
@@ -1333,6 +1340,13 @@ methods = {
choices=['read-uncommitted', 'read-committed', 'snapshot']),
Config('name', '', r'''
name of the transaction for tracing and debugging'''),
+ Config('operation_timeout_ms', '0', r'''
+ when non-zero, a requested limit on the number of elapsed real time milliseconds taken
+ to complete database operations in this transaction. Time is measured from the start
+ of each WiredTiger API call. There is no guarantee any operation will not take longer
+ than this amount of time. If WiredTiger notices the limit has been exceeded, an operation
+ may return a WT_ROLLBACK error. Default is to have no limit''',
+ min=1),
Config('priority', 0, r'''
priority of the transaction for resolving conflicts.
Transactions with higher values are less likely to abort''',
@@ -1436,8 +1450,8 @@ methods = {
dropped while a hot backup is in progress or if open in
a cursor''', type='list'),
Config('force', 'false', r'''
- by default, checkpoints may be skipped if the underlying object
- has not been modified, this option forces the checkpoint''',
+ if false (the default), checkpoints may be skipped if the underlying object has not been
+ modified, if true, this option forces the checkpoint''',
type='boolean'),
Config('name', '', r'''
if set, specify a name for the checkpoint (note that checkpoints
@@ -1445,10 +1459,9 @@ methods = {
Config('target', '', r'''
if non-empty, checkpoint the list of objects''', type='list'),
Config('use_timestamp', 'true', r'''
- by default, create the checkpoint as of the last stable timestamp
- if timestamps are in use, or all current updates if there is no
- stable timestamp set. If false, this option generates a checkpoint
- with all updates including those later than the timestamp''',
+ if true (the default), create the checkpoint as of the last stable timestamp if timestamps
+ are in use, or all current updates if there is no stable timestamp set. If false, this
+ option generates a checkpoint with all updates including those later than the timestamp''',
type='boolean'),
]),
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 1d398a4aa88..9e7eb0b23ac 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -205,7 +205,6 @@ src/support/rand.c
src/support/scratch.c
src/support/stat.c
src/support/thread_group.c
-src/support/time.c
src/txn/txn.c
src/txn/txn_ckpt.c
src/txn/txn_ext.c
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index f5e3584343d..85a240550ea 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -75,7 +75,6 @@ WT_TIMEDIFF_US
WT_TRACK_OP
WT_TRACK_OP_END
WT_TRACK_OP_INIT
-WT_TRACK_TIME
WT_TRET_ERROR_OK
WT_UPDATE_SIZE
WT_USE_OPENAT
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index b9792fbc82b..ffc3c469881 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -928,6 +928,8 @@ transaction_ops(WT_SESSION *session_arg)
error_check(conn->set_timestamp(conn, "stable_timestamp=2a"));
/*! [set stable timestamp] */
+ /* WT_CONNECTION.rollback_to_stable requires a timestamped checkpoint. */
+ error_check(session->checkpoint(session, NULL));
/*! [rollback to stable] */
error_check(conn->rollback_to_stable(conn, NULL));
/*! [rollback to stable] */
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 871a3a0366c..427cb8cd696 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "0cd668bf3ac3cdd5840d84d70205dabbb727278c",
+ "commit": "e0041ca53c1c1a4a23cc7aaa7ef8137dc1c61117",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.4"
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index ae2c64a126d..f3f8b31b33e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -314,8 +314,8 @@ __random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
for (i = __wt_random(&session->rnd) % WT_RANDOM_CURSOR_MOVE;;) {
ret = next ? __wt_btcur_next(cbt, false) : __wt_btcur_prev(cbt, false);
if (ret == WT_NOTFOUND) {
- next = false; /* Reverse direction from the end of the tree. */
- ret = __wt_btcur_prev(cbt, false);
+ next = !next; /* Reverse direction. */
+ ret = next ? __wt_btcur_next(cbt, false) : __wt_btcur_prev(cbt, false);
WT_RET(ret); /* An empty tree. */
}
if (i > 0)
@@ -324,8 +324,14 @@ __random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
/*
* Skip the record we returned last time, once. Clear the tracking value so we don't
* skip that record twice, it just means the tree is too small for anything reasonable.
+ *
+ * Testing WT_DATA_IN_ITEM requires explanation: the cursor temporary buffer is used to
+ * build keys for row-store searches and can point into the row-store page (which might
+ * have been freed subsequently). If a previous random call set the temporary buffer,
+ * then it will be local data. If it's local data for some other reason than a previous
+ * random call, we don't care: it won't match, and if it does we just retry.
*/
- if (cursor->key.size == cbt->tmp->size &&
+ if (WT_DATA_IN_ITEM(cbt->tmp) && cursor->key.size == cbt->tmp->size &&
memcmp(cursor->key.data, cbt->tmp->data, cbt->tmp->size) == 0) {
cbt->tmp->size = 0;
i = __wt_random(&session->rnd) % WT_RANDOM_CURSOR_MOVE;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index b21221439f6..176ade40575 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -116,6 +116,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
WT_DECL_RET;
WT_ITEM las_key, las_value;
WT_PAGE *page;
+ WT_PAGE_LOOKASIDE *page_las;
WT_UPDATE *first_upd, *last_upd, *upd;
wt_timestamp_t durable_timestamp, las_timestamp;
size_t incr, total_incr;
@@ -131,7 +132,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
locked = false;
total_incr = 0;
current_recno = recno = WT_RECNO_OOB;
- las_pageid = ref->page_las->las_pageid;
+ page_las = ref->page_las;
+ las_pageid = page_las->las_pageid;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_key);
@@ -167,7 +169,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* Confirm the search using the unique prefix; if not a match, we're done searching for
* records for this page.
*/
- if (las_pageid != ref->page_las->las_pageid)
+ if (las_pageid != page_las->las_pageid)
break;
/* Allocate the WT_UPDATE structure. */
@@ -265,12 +267,11 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
FLD_SET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE);
- if (ref->page_las->skew_newest && !ref->page_las->has_prepares &&
+ if (page_las->min_skipped_ts == WT_TS_MAX && !page_las->has_prepares &&
!S2C(session)->txn_global.has_stable_timestamp &&
- __wt_txn_visible_all(
- session, ref->page_las->unstable_txn, ref->page_las->unstable_durable_timestamp)) {
- page->modify->rec_max_txn = ref->page_las->max_txn;
- page->modify->rec_max_timestamp = ref->page_las->max_timestamp;
+ __wt_txn_visible_all(session, page_las->max_txn, page_las->max_ondisk_ts)) {
+ page->modify->rec_max_txn = page_las->max_txn;
+ page->modify->rec_max_timestamp = page_las->max_ondisk_ts;
__wt_page_modify_clear(session, page);
}
}
@@ -279,8 +280,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* Now the lookaside history has been read into cache there is no further need to maintain a
* reference to it.
*/
- ref->page_las->eviction_to_lookaside = false;
- ref->page_las->resolved = true;
+ page_las->eviction_to_lookaside = false;
+ page_las->resolved = true;
err:
if (locked)
@@ -543,7 +544,7 @@ skip_read:
* Don't free WT_REF.page_las, there may be concurrent readers.
*/
if (final_state == WT_REF_MEM && ref->page_las != NULL &&
- (!ref->page_las->skew_newest || ref->page_las->has_prepares))
+ (ref->page_las->min_skipped_ts != WT_TS_MAX || ref->page_las->has_prepares))
WT_ERR(__wt_las_remove_block(session, ref->page_las->las_pageid));
WT_REF_SET_STATE(ref, final_state);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 3fdaf9c240e..a988793e6e7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -299,21 +299,9 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* cache clean but with history that cannot be
* discarded), that is not wasted effort because
* checkpoint doesn't need to write the page again.
- *
- * Once the transaction has given up it's snapshot it
- * is no longer safe to reconcile pages. That happens
- * prior to the final metadata checkpoint.
- *
- * XXX Only attempt this eviction when there are no
- * readers older than the checkpoint. Otherwise, a bug
- * in eviction can mark the page clean and discard
- * history, causing those reads to incorrectly see
- * newer versions of data than they should.
*/
if (!WT_PAGE_IS_INTERNAL(page) && page->read_gen == WT_READGEN_WONT_NEED &&
- !tried_eviction && F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT) &&
- (!F_ISSET(txn, WT_TXN_HAS_TS_READ) ||
- txn->read_timestamp == conn->txn_global.pinned_timestamp)) {
+ !tried_eviction) {
WT_ERR_BUSY_OK(__wt_page_release_evict(session, walk, 0));
walk = prev;
prev = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index c017e7c8a9c..d278a5d0496 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -180,17 +180,15 @@ __wt_row_leaf_key_work(
copy = WT_ROW_KEY_COPY(rip);
#ifdef HAVE_DIAGNOSTIC
/*
- * Debugging added to detect and gather information for rare hang. Detect and abort if the
- * current operation takes too long.
+ * Debugging added to detect and gather information for rare hang, WT-5043. Detect and abort
+ * if the current function call or operation takes too long (and 5 minutes is an eternity).
*/
__wt_seconds32(session, &current);
WT_ERR_ASSERT(session, (current - start) < WT_MINUTE * 5, EINVAL,
- "Current function call taking too long: current %" PRIu32 " func started %" PRIu32,
- current, start);
+ "call tracking for WT-5043: %s took longer than 5 minutes", __func__);
WT_ERR_ASSERT(session,
- session->op_start == 0 || ((current - session->op_start) < WT_MINUTE * 5), EINVAL,
- "Operation taking too long: current %" PRIu32 " started %" PRIu32, current,
- session->op_start);
+ (session->op_5043_seconds == 0 || (current - session->op_5043_seconds) < WT_MINUTE * 5),
+ EINVAL, "operation tracking for WT-5043: %s took longer than 5 minutes", session->name);
#endif
/*
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index e1edcb596fa..aa05724b406 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -396,7 +396,6 @@ bool
__wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_TXN *txn;
- wt_timestamp_t unstable_timestamp;
txn = &session->txn;
@@ -425,13 +424,17 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
/*
* If some of the page's history overlaps with the reader's snapshot then we have to read it.
- * This is only relevant if we chose versions that were unstable when the page was written.
*/
- if (ref->page_las->skew_newest && WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
+ if (WT_TXNID_LE(txn->snap_min, ref->page_las->max_txn))
return (false);
+ /*
+ * Otherwise, if not reading at a timestamp, the page's history is in the past, so the page
+ * image is correct if it contains the most recent versions of everything and nothing was
+ * prepared.
+ */
if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
- return (ref->page_las->skew_newest);
+ return (!ref->page_las->has_prepares && ref->page_las->min_skipped_ts == WT_TS_MAX);
/*
* Skip lookaside history if reading as of a timestamp, we evicted new
@@ -439,21 +442,18 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
* possible for prepared updates, because the commit timestamp was not
* known when the page was evicted.
*
- * Skip lookaside pages if reading as of a timestamp, we evicted old
- * versions of data and all the unstable updates are in the future.
- *
- * Checkpoint should respect durable timestamps, other reads should
- * respect ordinary visibility. Checking for just the unstable updates
- * during checkpoint would end up reading more content from lookaside
- * than necessary.
+ * Otherwise, skip reading lookaside history if everything on the page
+ * is older than the read timestamp, and the oldest update in lookaside
+ * newer than the page is in the future of the reader. This seems
+ * unlikely, but is exactly what eviction tries to do when a checkpoint
+ * is running.
*/
- unstable_timestamp = WT_SESSION_IS_CHECKPOINT(session) ?
- ref->page_las->unstable_durable_timestamp :
- ref->page_las->unstable_timestamp;
- if (ref->page_las->skew_newest && !ref->page_las->has_prepares &&
- txn->read_timestamp > unstable_timestamp)
+ if (!ref->page_las->has_prepares && ref->page_las->min_skipped_ts == WT_TS_MAX &&
+ txn->read_timestamp >= ref->page_las->max_ondisk_ts)
return (true);
- if (!ref->page_las->skew_newest && txn->read_timestamp < unstable_timestamp)
+
+ if (txn->read_timestamp >= ref->page_las->max_ondisk_ts &&
+ txn->read_timestamp < ref->page_las->min_skipped_ts)
return (true);
return (false);
@@ -586,16 +586,15 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_MULTI *
"file ID %" PRIu32 ", page ID %" PRIu64
". "
"Max txn ID %" PRIu64
- ", unstable timestamp %s,"
- " unstable durable timestamp %s, %s. "
+ ", max ondisk timestamp %s, "
+ "first skipped ts %s. "
"Entries now in lookaside file: %" PRId64
", "
"cache dirty: %2.3f%% , "
"cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid, multi->page_las.max_txn,
- __wt_timestamp_to_string(multi->page_las.unstable_timestamp, ts_string[0]),
- __wt_timestamp_to_string(multi->page_las.unstable_durable_timestamp, ts_string[1]),
- multi->page_las.skew_newest ? "newest" : "not newest",
+ __wt_timestamp_to_string(multi->page_las.max_ondisk_ts, ts_string[0]),
+ __wt_timestamp_to_string(multi->page_las.min_skipped_ts, ts_string[1]),
WT_STAT_READ(conn->stats, cache_lookaside_entries), pct_dirty, pct_full);
}
@@ -746,18 +745,14 @@ __wt_las_insert_block(
if (upd == list->onpage_upd && upd->size > 0 &&
(upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_MODIFY)) {
las_value.size = 0;
- WT_ASSERT(session, upd != first_upd || multi->page_las.skew_newest);
cursor->set_value(cursor, upd->txnid, upd->start_ts, upd->durable_ts,
upd->prepare_state, WT_UPDATE_BIRTHMARK, &las_value);
} else
cursor->set_value(cursor, upd->txnid, upd->start_ts, upd->durable_ts,
upd->prepare_state, upd->type, &las_value);
- /*
- * Using update looks a little strange because the keys are guaranteed to not exist, but
- * since we're appending, we want the cursor to stay positioned in between inserts.
- */
- WT_ERR(cursor->update(cursor));
+ /* Using insert so we don't keep the page pinned longer than necessary. */
+ WT_ERR(cursor->insert(cursor));
++insert_cnt;
if (upd->prepare_state == WT_PREPARE_INPROGRESS)
++prepared_insert_cnt;
diff --git a/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h b/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
index dae4b9d1c1e..88ddc900243 100644
--- a/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
+++ b/src/third_party/wiredtiger/src/checksum/zseries/slicing-consts.h
@@ -1,4 +1,5 @@
/* CRC-32 and CRC-32C slicing-by-8 constants, for use on big-endian systems. */
+#if 0
static const unsigned int __attribute__((aligned(128))) crc32table_le[8][256] = {
{0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, 0x8ff46a70, 0x35a563e9, 0xa395649e,
0x3288db0e, 0xa4b8dc79, 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, 0x911dbf90,
@@ -257,7 +258,9 @@ static const unsigned int __attribute__((aligned(128))) crc32table_le[8][256] =
0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, 0x2e1a8bb7, 0xcd1d0439, 0x531daef5,
0xff0f8e2c, 0x610f24e0, 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea,
0xe6064b26}};
+#endif /* NOT CURRENTLY USED */
+#if 0
static const unsigned int __attribute__((aligned(128))) crc32table_be[8][256] = {
{0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
@@ -516,6 +519,7 @@ static const unsigned int __attribute__((aligned(128))) crc32table_be[8][256] =
0x3548049b, 0x6ee9d851, 0x820bbd0f, 0xd9aa61c5, 0x5f0e6a04, 0x04afb6ce, 0xe84dd390, 0xb3ec0f5a,
0xe1c4d9a5, 0xba65056f, 0x56876031, 0x0d26bcfb, 0x8b82b73a, 0xd0236bf0, 0x3cc10eae,
0x6760d264}};
+#endif /* NOT CURRENTLY USED */
static const unsigned int __attribute__((aligned(128))) crc32ctable_le[8][256] = {
{0x00000000, 0x03836bf2, 0xf7703be1, 0xf4f35013, 0x1f979ac7, 0x1c14f135, 0xe8e7a126, 0xeb64cad4,
@@ -776,6 +780,7 @@ static const unsigned int __attribute__((aligned(128))) crc32ctable_le[8][256] =
0xa1354ce5, 0x864870ac, 0xefcf3477, 0xc8b2083e, 0xccb751c4, 0xebca6d8d, 0x824d2956,
0xa530151f}};
+#if 0
static const unsigned int __attribute__((aligned(128))) crc32ctable_be[8][256] = {
{0x00000000, 0x1edc6f41, 0x3db8de82, 0x2364b1c3, 0x7b71bd04, 0x65add245, 0x46c96386, 0x58150cc7,
0xf6e37a08, 0xe83f1549, 0xcb5ba48a, 0xd587cbcb, 0x8d92c70c, 0x934ea84d, 0xb02a198e, 0xaef676cf,
@@ -1034,3 +1039,4 @@ static const unsigned int __attribute__((aligned(128))) crc32ctable_be[8][256] =
0x7b80461d, 0x5de9c631, 0x37534645, 0x113ac669, 0xe22646ad, 0xc44fc681, 0xaef546f5, 0x889cc6d9,
0x5610283c, 0x7079a810, 0x1ac32864, 0x3caaa848, 0xcfb6288c, 0xe9dfa8a0, 0x836528d4,
0xa50ca8f8}};
+#endif /* NOT CURRENTLY USED */
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index e23c4dd4c5e..958c267a7ce 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -118,6 +118,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{"log", "category", NULL, NULL, confchk_WT_CONNECTION_reconfigure_log_subconfigs, 4},
{"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
{"lsm_merge", "boolean", NULL, NULL, NULL, 0},
+ {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
{"operation_tracking", "category", NULL, NULL,
confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
{"shared_cache", "category", NULL, NULL, confchk_wiredtiger_open_shared_cache_subconfigs, 5},
@@ -190,7 +191,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
"choices=[\"read-uncommitted\",\"read-committed\","
"\"snapshot\"]",
NULL, 0},
- {"name", "string", NULL, NULL, NULL, 0}, {"priority", "int", NULL, "min=-100,max=100", NULL, 0},
+ {"name", "string", NULL, NULL, NULL, 0}, {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
+ {"priority", "int", NULL, "min=-100,max=100", NULL, 0},
{"read_timestamp", "string", NULL, NULL, NULL, 0},
{"roundup_timestamps", "category", NULL, NULL,
confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs, 2},
@@ -551,6 +553,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
{"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
{"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
{"operation_tracking", "category", NULL, NULL,
confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
{"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
@@ -618,6 +621,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
{"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
{"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
{"operation_tracking", "category", NULL, NULL,
confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
{"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
@@ -685,6 +689,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
{"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
{"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
{"operation_tracking", "category", NULL, NULL,
confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
{"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
@@ -750,6 +755,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{"lsm_manager", "category", NULL, NULL, confchk_wiredtiger_open_lsm_manager_subconfigs, 2},
{"lsm_merge", "boolean", NULL, NULL, NULL, 0}, {"mmap", "boolean", NULL, NULL, NULL, 0},
{"multiprocess", "boolean", NULL, NULL, NULL, 0},
+ {"operation_timeout_ms", "int", NULL, "min=1", NULL, 0},
{"operation_tracking", "category", NULL, NULL,
confchk_wiredtiger_open_operation_tracking_subconfigs, 2},
{"readonly", "boolean", NULL, NULL, NULL, 0}, {"salvage", "boolean", NULL, NULL, NULL, 0},
@@ -815,12 +821,12 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"close_scan_interval=10),io_capacity=(total=0),log=(archive=true,"
"os_cache_dirty_pct=0,prealloc=true,zero_fill=false),"
"lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "operation_tracking=(enabled=false,path=\".\"),"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
+ "operation_timeout_ms=0,operation_tracking=(enabled=false,"
+ "path=\".\"),shared_cache=(chunk=10MB,name=,quota=0,reserve=0,"
+ "size=500MB),statistics=none,statistics_log=(json=false,"
+ "on_close=false,sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,verbose=",
- confchk_WT_CONNECTION_reconfigure, 26},
+ confchk_WT_CONNECTION_reconfigure, 27},
{"WT_CONNECTION.rollback_to_stable", "", NULL, 0}, {"WT_CONNECTION.set_file_system", "", NULL, 0},
{"WT_CONNECTION.set_timestamp",
"commit_timestamp=,durable_timestamp=,force=false,"
@@ -836,9 +842,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
",os_cache_max=0",
confchk_WT_SESSION_alter, 8},
{"WT_SESSION.begin_transaction",
- "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
- ",roundup_timestamps=(prepared=false,read=false),snapshot=,sync=",
- confchk_WT_SESSION_begin_transaction, 8},
+ "ignore_prepare=false,isolation=,name=,operation_timeout_ms=0,"
+ "priority=0,read_timestamp=,roundup_timestamps=(prepared=false,"
+ "read=false),snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 9},
{"WT_SESSION.checkpoint", "drop=,force=false,name=,target=,use_timestamp=true",
confchk_WT_SESSION_checkpoint, 5},
{"WT_SESSION.close", "", NULL, 0},
@@ -989,16 +996,16 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
"prealloc=true,recover=on,zero_fill=false),"
"lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),use_environment=true,use_environment_priv=false,"
+ "mmap=true,multiprocess=false,operation_timeout_ms=0,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "salvage=false,session_max=100,session_scratch_max=2MB,"
+ "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
+ ",method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,write_through=",
- confchk_wiredtiger_open, 50},
+ confchk_wiredtiger_open, 51},
{"wiredtiger_open_all",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_cursors=true,"
@@ -1019,16 +1026,16 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
"prealloc=true,recover=on,zero_fill=false),"
"lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),use_environment=true,use_environment_priv=false,"
+ "mmap=true,multiprocess=false,operation_timeout_ms=0,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "salvage=false,session_max=100,session_scratch_max=2MB,"
+ "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
+ ",method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 51},
+ confchk_wiredtiger_open_all, 52},
{"wiredtiger_open_basecfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_cursors=true,"
@@ -1047,15 +1054,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
"prealloc=true,recover=on,zero_fill=false),"
"lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 45},
+ "mmap=true,multiprocess=false,operation_timeout_ms=0,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "salvage=false,session_max=100,session_scratch_max=2MB,"
+ "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
+ ",method=fsync),verbose=,version=(major=0,minor=0),write_through=",
+ confchk_wiredtiger_open_basecfg, 46},
{"wiredtiger_open_usercfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_cursors=true,"
@@ -1074,15 +1081,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
"prealloc=true,recover=on,zero_fill=false),"
"lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,salvage=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=false,on_close=false,"
- "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "timing_stress_for_test=,transaction_sync=(enabled=false,"
- "method=fsync),verbose=,write_through=",
- confchk_wiredtiger_open_usercfg, 44},
+ "mmap=true,multiprocess=false,operation_timeout_ms=0,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "salvage=false,session_max=100,session_scratch_max=2MB,"
+ "session_table_cache=true,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),timing_stress_for_test=,transaction_sync=(enabled=false"
+ ",method=fsync),verbose=,write_through=",
+ confchk_wiredtiger_open_usercfg, 45},
{NULL, NULL, NULL, 0}};
int
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index c7e776c62c4..9e7964758ff 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1010,7 +1010,6 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
WT_SESSION *wt_session;
WT_SESSION_IMPL *s, *session;
uint32_t i;
- const char *ckpt_cfg;
conn = (WT_CONNECTION_IMPL *)wt_conn;
@@ -1074,47 +1073,24 @@ err:
WT_TRET(__wt_lsm_manager_destroy(session));
/*
- * After the async and LSM threads have exited, we shouldn't opening any more files.
+ * After the async and LSM threads have exited, we won't open more files for the application.
+ * However, the sweep server is still running and it can close file handles at the same time the
+ * final checkpoint is reviewing open data handles (forcing checkpoint to reopen handles). Shut
+ * down the sweep server and then flag the system should not open anything new.
*/
+ WT_TRET(__wt_sweep_destroy(session));
F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS);
WT_FULL_BARRIER();
- /* The default session is used to access data handles during close. */
- F_CLR(session, WT_SESSION_NO_DATA_HANDLES);
-
/*
- * Perform a system-wide checkpoint so that all tables are consistent with each other. All
- * transactions are resolved but ignore timestamps to make sure all data gets to disk. Do this
- * before shutting down all the subsystems. We have shut down all user sessions, but send in
- * true for waiting for internal races.
+ * Shut down the checkpoint and capacity server threads: we don't want to throttle writes and
+ * we're about to do a final checkpoint separately from the checkpoint server.
*/
- WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- ckpt_cfg = "use_timestamp=false";
- if (cval.val != 0) {
- ckpt_cfg = "use_timestamp=true";
- if (conn->txn_global.has_stable_timestamp)
- F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
- }
- if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) {
- s = NULL;
- WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, &s));
- if (s != NULL) {
- const char *checkpoint_cfg[] = {
- WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
- wt_session = &s->iface;
- WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true));
+ WT_TRET(__wt_capacity_server_destroy(session));
+ WT_TRET(__wt_checkpoint_server_destroy(session));
- /*
- * Mark the metadata dirty so we flush it on close, allowing recovery to be skipped.
- */
- WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s), __wt_tree_modify_set(s));
-
- WT_TRET(wt_session->close(wt_session, config));
- }
- }
-
- /* Shut down the global transaction state. */
- __wt_txn_global_shutdown(session);
+ /* Perform a final checkpoint and shut down the global transaction state. */
+ WT_TRET(__wt_txn_global_shutdown(session, config, cfg));
if (ret != 0) {
__wt_err(session, ret, "failure during close, disabling further writes");
@@ -2574,6 +2550,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
conn->mmap = cval.val != 0;
+ WT_ERR(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval));
+ conn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
+
WT_ERR(__wt_config_gets(session, cfg, "salvage", &cval));
if (cval.val) {
if (F_ISSET(conn, WT_CONN_READONLY))
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index fc352bbf821..f7e338ac9bb 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -82,10 +82,12 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
F_SET(conn, WT_CONN_CLOSING);
WT_FULL_BARRIER();
+ /* The default session is used to access data handles during close. */
+ F_CLR(session, WT_SESSION_NO_DATA_HANDLES);
+
/*
- * Shut down server threads other than the eviction server, which is needed later to close btree
- * handles. Some of these threads access btree handles, so take care in ordering shutdown to
- * make sure they exit before files are closed.
+ * Shut down server threads. Some of these threads access btree handles and eviction, shut them
+ * down before the eviction server, and shut all servers down before closing open data handles.
*/
WT_TRET(__wt_capacity_server_destroy(session));
WT_TRET(__wt_checkpoint_server_destroy(session));
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index c224a3b7b11..00d02886920 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -277,10 +277,12 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
conn = S2C(session);
cache = conn->cache;
- /*
- * The thread group code calls us repeatedly. So each call is one pass through eviction.
- */
- WT_TRACK_TIME(session);
+/*
+ * The thread group code calls us repeatedly. So each call is one pass through eviction.
+ */
+#ifdef HAVE_DIAGNOSTIC
+ __wt_seconds32(session, &session->op_5043_seconds);
+#endif
if (conn->evict_server_running && __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) {
/*
* Cannot use WT_WITH_PASS_LOCK because this is a try lock. Fix when that is supported. We
@@ -2295,21 +2297,23 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
if (timer)
time_start = __wt_clock(session);
- WT_TRACK_TIME(session);
+#ifdef HAVE_DIAGNOSTIC
+ __wt_seconds32(session, &session->op_5043_seconds);
+#endif
for (initial_progress = cache->eviction_progress;; ret = 0) {
/*
- * A pathological case: if we're the oldest transaction in the
- * system and the eviction server is stuck trying to find space
- * (and we're not in recovery, because those transactions can't
- * be rolled back), abort the transaction to give up all hazard
- * pointers before trying again.
+ * If eviction is stuck, check if this thread is likely causing problems and should be
+ * rolled back. Ignore if in recovery, those transactions can't be rolled back.
*/
- if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session) &&
- !F_ISSET(conn, WT_CONN_RECOVERING)) {
- --cache->evict_aggressive_score;
- WT_STAT_CONN_INCR(session, txn_fail_cache);
- WT_ERR(
- __wt_txn_rollback_required(session, "oldest transaction rolled back for eviction"));
+ if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) {
+ ret = __wt_txn_is_blocking_old(session);
+ if (ret == 0)
+ ret = __wt_txn_is_blocking_pin(session);
+ if (ret == WT_ROLLBACK) {
+ --cache->evict_aggressive_score;
+ WT_STAT_CONN_INCR(session, txn_fail_cache);
+ }
+ WT_ERR(ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 41ecfb40242..785c6219c6b 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -678,15 +678,8 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
/* Reconcile the page. */
ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp);
-
- /*
- * If attempting eviction during a checkpoint, we may successfully reconcile but then find that
- * there are updates on the page too new to evict. Give up evicting in that case: checkpoint
- * will include the reconciled page when it visits the parent.
- */
- if (WT_SESSION_BTREE_SYNC(session) && !__wt_page_is_modified(page) &&
- !__wt_txn_visible_all(session, page->modify->rec_max_txn, page->modify->rec_max_timestamp))
- return (__wt_set_return(session, EBUSY));
+ WT_ASSERT(session, __wt_page_is_modified(page) ||
+ __wt_txn_visible_all(session, page->modify->rec_max_txn, page->modify->rec_max_timestamp));
/*
* If reconciliation fails but reports it might succeed if we use the lookaside table, try again
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 817ccbae553..36cefa8dc68 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -27,11 +27,9 @@
#define WT_SINGLE_THREAD_CHECK_STOP(s) \
if (--(s)->api_enter_refcnt == 0) \
WT_PUBLISH((s)->api_tid, 0);
-#define WT_TRACK_TIME(s) __wt_seconds32((s), &(s)->op_start)
#else
#define WT_SINGLE_THREAD_CHECK_START(s)
#define WT_SINGLE_THREAD_CHECK_STOP(s)
-#define WT_TRACK_TIME(s) (s)->op_start = 0
#endif
/* Standard entry points to the API: declares/initializes local variables. */
@@ -46,8 +44,8 @@
* correct. \
*/ \
WT_TRACK_OP_INIT(s); \
- (s)->op_start = 0; \
WT_SINGLE_THREAD_CHECK_START(s); \
+ __wt_op_timer_start(s); \
WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
/* Reset wait time if this isn't an API reentry. */ \
if (__oldname == NULL) \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 68d6f53c0f3..d168d10593c 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -230,21 +230,31 @@ struct __wt_ovfl_reuse {
/*
* WT_PAGE_LOOKASIDE --
- * Related information for on-disk pages with lookaside entries.
+ * Information for on-disk pages with lookaside entries.
+ *
+ * This information is used to decide whether history evicted to lookaside is
+ * needed for a read, and when it is no longer needed at all. We track the
+ * newest update written to the disk image in `max_ondisk_ts`, and the oldest
+ * update skipped to choose the on-disk version in `min_skipped_ts`. If no
+ * updates were skipped, then the disk image contains the newest versions of
+ * all updates and `min_skipped_ts == WT_TS_MAX`.
+ *
+ * For reads without a timestamp, we check that there are no skipped updates
+ * and that the reader's snapshot can see everything on disk.
+ *
+ * For readers with a timestamp, it is safe to ignore lookaside if either
+ * (a) there are no skipped updates and everything on disk is visible, or
+ * (b) everything on disk is visible, and the minimum skipped update is in
+ * the future of the reader.
*/
struct __wt_page_lookaside {
- uint64_t las_pageid; /* Page ID in lookaside */
- uint64_t max_txn; /* Maximum transaction ID */
- uint64_t unstable_txn; /* First transaction ID not on page */
- wt_timestamp_t max_timestamp; /* Maximum timestamp */
- wt_timestamp_t unstable_timestamp; /* First timestamp not on page */
- wt_timestamp_t unstable_durable_timestamp;
- /* First durable timestamp not on
- * page */
- bool eviction_to_lookaside; /* Revert to lookaside on eviction */
- bool has_prepares; /* One or more updates are prepared */
- bool resolved; /* History has been read into cache */
- bool skew_newest; /* Page image has newest versions */
+ uint64_t las_pageid; /* Page ID in lookaside */
+ uint64_t max_txn; /* Maximum transaction ID */
+ wt_timestamp_t max_ondisk_ts; /* Maximum timestamp on disk */
+ wt_timestamp_t min_skipped_ts; /* Skipped in favor of disk version */
+ bool eviction_to_lookaside; /* Revert to lookaside on eviction */
+ bool has_prepares; /* One or more updates are prepared */
+ bool resolved; /* History has been read into cache */
};
/*
@@ -909,7 +919,7 @@ struct __wt_ref {
WT_SESSION_IMPL *session;
const char *name;
const char *func;
- uint32_t time_sec; /* DEBUGGING field for rare hang. */
+ uint32_t time_sec;
uint16_t line;
uint16_t state;
} hist[WT_REF_SAVE_STATE_MAX];
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 3f80ee5cda7..2fa3e0d94d3 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1160,12 +1160,10 @@ __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
return (false);
if (page_las->resolved)
return (false);
- if (!page_las->skew_newest || page_las->has_prepares)
+ if (page_las->min_skipped_ts != WT_TS_MAX || page_las->has_prepares)
return (true);
- if (__wt_txn_visible_all(session, page_las->max_txn, page_las->max_timestamp))
- return (false);
- return (true);
+ return (!__wt_txn_visible_all(session, page_las->max_txn, page_las->max_ondisk_ts));
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 174263c3949..32becc05467 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -204,6 +204,8 @@ struct __wt_connection_impl {
/* Configuration */
const WT_CONFIG_ENTRY **config_entries;
+ uint64_t operation_timeout_us; /* Maximum operation period before rollback */
+
const char *optrack_path; /* Directory for operation logs */
WT_FH *optrack_map_fh; /* Name to id translation file. */
WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index d02b4dca326..4844a88380c 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1421,8 +1421,14 @@ extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const char **cfg)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_is_blocking_old(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_is_blocking_pin(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
@@ -1532,8 +1538,6 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api)
@@ -1607,8 +1611,6 @@ extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...);
extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...);
extern void __wt_encrypt_size(
WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep);
-extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_err_func(
WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 5, 6)))
@@ -1687,9 +1689,6 @@ extern void __wt_root_ref_init(
extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp);
extern void __wt_scr_discard(WT_SESSION_IMPL *session);
-extern void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_seconds32(WT_SESSION_IMPL *session, uint32_t *secondsp);
extern void __wt_session_close_cache(WT_SESSION_IMPL *session);
extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which);
extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which);
@@ -1722,7 +1721,6 @@ extern void __wt_txn_clear_timestamp_queues(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session);
-extern void __wt_txn_global_shutdown(WT_SESSION_IMPL *session);
extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session);
extern void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op);
extern void __wt_txn_publish_read_timestamp(WT_SESSION_IMPL *session);
@@ -1770,6 +1768,8 @@ static inline bool __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *p
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_off_page(WT_PAGE *page, const void *p)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_op_timer_fired(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
@@ -1797,10 +1797,6 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref,
WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_am_oldest(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline bool __wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
@@ -2073,6 +2069,8 @@ static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_clock(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline uint64_t __wt_clock_to_nsec(uint64_t end, uint64_t begin)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_rdtsc(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2110,6 +2108,8 @@ static inline void __wt_cond_wait(
WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *));
static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session);
static inline void __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session);
+static inline void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
+static inline void __wt_op_timer_start(WT_SESSION_IMPL *session);
static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
@@ -2137,6 +2137,8 @@ static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *p
WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack);
static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack);
static inline void __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp);
+static inline void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp);
+static inline void __wt_seconds32(WT_SESSION_IMPL *session, uint32_t *secondsp);
static inline void __wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs);
static inline void __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h
index 189bc948714..a2280aefa4f 100644
--- a/src/third_party/wiredtiger/src/include/extern_posix.h
+++ b/src/third_party/wiredtiger/src/include/extern_posix.h
@@ -51,7 +51,8 @@ extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs,
bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
-extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
+extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_stream_set_line_buffer(FILE *fp)
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index 7b908ac3871..e937858ba5b 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -30,49 +30,6 @@ __wt_hex(int c)
}
/*
- * __wt_rdtsc --
- * Get a timestamp from CPU registers.
- */
-static inline uint64_t
-__wt_rdtsc(void)
-{
-#if defined(__i386)
- {
- uint64_t x;
-
- __asm__ volatile("rdtsc" : "=A"(x));
- return (x);
- }
-#elif defined(__amd64)
- {
- uint64_t a, d;
-
- __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
- return ((d << 32) | a);
- }
-#else
- return (0);
-#endif
-}
-
-/*
- * __wt_clock --
- * Obtain a timestamp via either a CPU register or via a system call on platforms where
- * obtaining it directly from the hardware register is not supported.
- */
-static inline uint64_t
-__wt_clock(WT_SESSION_IMPL *session)
-{
- struct timespec tsp;
-
- if (__wt_process.use_epochtime) {
- __wt_epoch(session, &tsp);
- return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
- }
- return (__wt_rdtsc());
-}
-
-/*
* __wt_strdup --
* ANSI strdup function.
*/
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 22f63ae4ff4..8403097e03a 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -33,12 +33,9 @@ struct __wt_reconcile {
/* Track the page's min/maximum transactions. */
uint64_t max_txn;
- wt_timestamp_t max_timestamp;
-
- /* Lookaside boundary tracking. */
- uint64_t unstable_txn;
- wt_timestamp_t unstable_durable_timestamp;
- wt_timestamp_t unstable_timestamp;
+ wt_timestamp_t max_ts;
+ wt_timestamp_t max_ondisk_ts;
+ wt_timestamp_t min_skipped_ts;
u_int updates_seen; /* Count of updates seen. */
u_int updates_unstable; /* Count of updates not visible_all. */
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index b1da78f4668..01eae24cb44 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -61,7 +61,12 @@ struct __wt_session_impl {
const char *name; /* Name */
const char *lastop; /* Last operation */
uint32_t id; /* UID, offset in session array */
- uint32_t op_start; /* DEBUGGING: Operation start time (seconds) */
+
+ uint64_t operation_start_us; /* Operation start */
+ uint64_t operation_timeout_us; /* Maximum operation period before rollback */
+#ifdef HAVE_DIAGNOSTIC
+ uint32_t op_5043_seconds; /* Temporary debugging to catch WT-5043, discard after 01/2020. */
+#endif
WT_EVENT_HANDLER *event_handler; /* Application's event handlers */
diff --git a/src/third_party/wiredtiger/src/include/time.i b/src/third_party/wiredtiger/src/include/time.i
new file mode 100644
index 00000000000..bad2f0417ad
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/time.i
@@ -0,0 +1,182 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_rdtsc --
+ * Get a timestamp from CPU registers.
+ */
+static inline uint64_t
+__wt_rdtsc(void)
+{
+#if defined(__i386)
+ {
+ uint64_t x;
+
+ __asm__ volatile("rdtsc" : "=A"(x));
+ return (x);
+ }
+#elif defined(__amd64)
+ {
+ uint64_t a, d;
+
+ __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
+ return ((d << 32) | a);
+ }
+#else
+ return (0);
+#endif
+}
+
+/*
+ * __time_check_monotonic --
+ * Check and prevent time running backward. If we detect that it has, we set the time structure
+ * to the previous values, making time stand still until we see a time in the future of the
+ * highest value seen so far.
+ */
+static inline void
+__time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
+{
+ /*
+ * Detect time going backward. If so, use the last saved timestamp.
+ */
+ if (session == NULL)
+ return;
+
+ if (tsp->tv_sec < session->last_epoch.tv_sec ||
+ (tsp->tv_sec == session->last_epoch.tv_sec && tsp->tv_nsec < session->last_epoch.tv_nsec)) {
+ WT_STAT_CONN_INCR(session, time_travel);
+ *tsp = session->last_epoch;
+ } else
+ session->last_epoch = *tsp;
+}
+
+/*
+ * __wt_epoch --
+ * Return the time since the Epoch.
+ */
+static inline void
+__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
+{
+ struct timespec tmp;
+
+ /*
+ * Read into a local variable, then check for monotonically increasing time, ensuring single
+ * threads never see time move backward. We don't prevent multiple threads from seeing time move
+ * backwards (even when reading time serially, the saved last-read time is per thread, not per
+ * timer, so multiple threads can race the time). Nor do we prevent multiple threads
+ * simultaneously reading the time from seeing random time or time moving backwards (assigning
+ * the time structure to the returned memory location implies multicycle writes to memory).
+ */
+ __wt_epoch_raw(session, &tmp);
+ __time_check_monotonic(session, &tmp);
+ *tsp = tmp;
+}
+
+/*
+ * __wt_clock --
+ * Obtain a timestamp via either a CPU register or via a system call on platforms where
+ * obtaining it directly from the hardware register is not supported.
+ */
+static inline uint64_t
+__wt_clock(WT_SESSION_IMPL *session)
+{
+ struct timespec tsp;
+
+ /*
+ * In one case we return nanoseconds, in the other we return clock ticks. That looks wrong, but
+ * it's not. When simply comparing before and after values, which is returned doesn't matter.
+ * When trying to calculate wall-clock time (that is, comparing a starting time with an ending
+ * time), we'll subtract the two values and then call a function to convert the result of the
+ * subtraction into nanoseconds. In the case where we already have nanoseconds, that function
+ * has a conversion constant of 1 and we'll skip the conversion, in the case where we have clock
+ * ticks, the conversion constant will be real. The reason is because doing it that way avoids a
+ * floating-point operation per wall-clock time calculation.
+ */
+ if (__wt_process.use_epochtime) {
+ __wt_epoch(session, &tsp);
+ return ((uint64_t)(tsp.tv_sec * WT_BILLION + tsp.tv_nsec));
+ }
+ return (__wt_rdtsc());
+}
+
+/*
+ * __wt_seconds --
+ * Return the seconds since the Epoch.
+ */
+static inline void
+__wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp)
+{
+ struct timespec t;
+
+ __wt_epoch(session, &t);
+
+ *secondsp = (uint64_t)(t.tv_sec + t.tv_nsec / WT_BILLION);
+}
+
+/*
+ * __wt_seconds32 --
+ * Return the seconds since the Epoch in 32 bits.
+ */
+static inline void
+__wt_seconds32(WT_SESSION_IMPL *session, uint32_t *secondsp)
+{
+ uint64_t seconds;
+
+ /* This won't work in 2038. But for now allow it. */
+ __wt_seconds(session, &seconds);
+ *secondsp = (uint32_t)seconds;
+}
+
+/*
+ * __wt_clock_to_nsec --
+ * Convert from clock ticks to nanoseconds.
+ */
+static inline uint64_t
+__wt_clock_to_nsec(uint64_t end, uint64_t begin)
+{
+ double clock_diff;
+
+ /*
+ * If the ticks were reset, consider it an invalid check and just return zero as the time
+ * difference because we cannot compute anything meaningful.
+ */
+ if (end < begin)
+ return (0);
+ clock_diff = (double)(end - begin);
+ return ((uint64_t)(clock_diff / __wt_process.tsc_nsec_ratio));
+}
+
+/*
+ * __wt_op_timer_start --
+ * Start the operations timer.
+ */
+static inline void
+__wt_op_timer_start(WT_SESSION_IMPL *session)
+{
+ session->operation_start_us = session->operation_timeout_us == 0 ? 0 : __wt_clock(session);
+}
+
+/*
+ * __wt_op_timer_fired --
+ * Check the operations timers.
+ */
+static inline bool
+__wt_op_timer_fired(WT_SESSION_IMPL *session)
+{
+ uint64_t diff, now;
+
+ /* Check for both a timeout and a start time to avoid any future configuration races. */
+ if (session->operation_timeout_us == 0 || session->operation_start_us == 0)
+ return (false);
+
+ now = __wt_clock(session);
+ diff = WT_CLOCKDIFF_US(now, session->operation_start_us);
+ return (diff > session->operation_timeout_us);
+}
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 3e5d2bfd850..6d7ead93201 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -793,19 +793,6 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
}
/*
- * __wt_txn_upd_durable --
- * Can the current transaction make the given update durable.
- */
-static inline bool
-__wt_txn_upd_durable(WT_SESSION_IMPL *session, WT_UPDATE *upd)
-{
- /* If update is visible then check if it is durable. */
- if (__wt_txn_upd_visible_type(session, upd) != WT_VISIBLE_TRUE)
- return (false);
- return (__wt_txn_visible(session, upd->txnid, upd->durable_ts));
-}
-
-/*
* __wt_txn_upd_visible --
* Can the current transaction see the given update.
*/
@@ -871,8 +858,12 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
if (session->ncursors > 0)
WT_RET(__wt_session_copy_values(session));
- /* Stall here if the cache is completely full. */
- WT_RET(__wt_cache_eviction_check(session, false, true, NULL));
+ /*
+ * Stall here if the cache is completely full. We have allocated a transaction ID which
+ * makes it possible for eviction to decide we're contributing to the problem and return
+ * WT_ROLLBACK. The WT_SESSION.begin_transaction API can't return rollback, continue on.
+ */
+ WT_RET_ERROR_OK(__wt_cache_eviction_check(session, false, true, NULL), WT_ROLLBACK);
__wt_txn_get_snapshot(session);
}
@@ -1145,40 +1136,6 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_am_oldest --
- * Am I the oldest transaction in the system?
- */
-static inline bool
-__wt_txn_am_oldest(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *s;
- uint64_t id;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
- txn = &session->txn;
- txn_global = &conn->txn_global;
-
- if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE))
- return (false);
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states; i < session_cnt; i++, s++)
- /*
- * We are checking if the transaction is oldest one in the system. It is safe to ignore any
- * sessions that are allocating transaction IDs, since we already have an ID, they are
- * guaranteed to be newer.
- */
- if (!s->is_allocating && (id = s->id) != WT_TXN_NONE && WT_TXNID_LT(id, txn->id))
- return (false);
-
- return (true);
-}
-
-/*
* __wt_txn_activity_check --
* Check whether there are any running transactions.
*/
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index b9fed57f9ad..892d78b89a4 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1723,6 +1723,12 @@ struct __wt_session {
* \c "read-committed"\, \c "snapshot"; default empty.}
* @config{name, name of the transaction for tracing and debugging., a string; default
* empty.}
+ * @config{operation_timeout_ms, when non-zero\, a requested limit on the number of elapsed
+ * real time milliseconds taken to complete database operations in this transaction. Time
+ * is measured from the start of each WiredTiger API call. There is no guarantee any
+ * operation will not take longer than this amount of time. If WiredTiger notices the limit
+ * has been exceeded\, an operation may return a WT_ROLLBACK error. Default is to have no
+ * limit., an integer greater than or equal to 1; default \c 0.}
* @config{priority, priority of the transaction for resolving conflicts. Transactions with
* higher values are less likely to abort., an integer between -100 and 100; default \c 0.}
* @config{read_timestamp, read using the specified timestamp. The supplied value must not
@@ -1906,16 +1912,17 @@ struct __wt_session {
* "to=<checkpoint>" to drop all checkpoints before and including the named checkpoint.
* Checkpoints cannot be dropped while a hot backup is in progress or if open in a cursor.,
* a list of strings; default empty.}
- * @config{force, by default\, checkpoints may be skipped if the underlying object has not
- * been modified\, this option forces the checkpoint., a boolean flag; default \c false.}
+ * @config{force, if false (the default)\, checkpoints may be skipped if the underlying
+ * object has not been modified\, if true\, this option forces the checkpoint., a boolean
+ * flag; default \c false.}
* @config{name, if set\, specify a name for the checkpoint (note that checkpoints including
* LSM trees may not be named)., a string; default empty.}
* @config{target, if non-empty\, checkpoint the list of objects., a list of strings;
* default empty.}
- * @config{use_timestamp, by default\, create the checkpoint as of the last stable timestamp
- * if timestamps are in use\, or all current updates if there is no stable timestamp set.
- * If false\, this option generates a checkpoint with all updates including those later than
- * the timestamp., a boolean flag; default \c true.}
+ * @config{use_timestamp, if true (the default)\, create the checkpoint as of the last
+ * stable timestamp if timestamps are in use\, or all current updates if there is no stable
+ * timestamp set. If false\, this option generates a checkpoint with all updates including
+ * those later than the timestamp., a boolean flag; default \c true.}
* @configend
* @errors
*/
@@ -2259,6 +2266,12 @@ struct __wt_connection {
* database. Each worker thread uses a session handle from the configured session_max., an
* integer between 3 and 20; default \c 4.}
* @config{ ),,}
+ * @config{operation_timeout_ms, when non-zero\, a requested limit on the number of elapsed
+ * real time milliseconds application threads will take to complete database operations.
+ * Time is measured from the start of each WiredTiger API call. There is no guarantee any
+ * operation will not take longer than this amount of time. If WiredTiger notices the limit
+ * has been exceeded\, an operation may return a WT_ROLLBACK error. Default is to have no
+ * limit., an integer greater than or equal to 1; default \c 0.}
* @config{operation_tracking = (, enable tracking of performance-critical functions. See
* @ref operation_tracking for more information., a set of related configuration options
* defined below.}
@@ -2493,18 +2506,16 @@ struct __wt_connection {
/*!
* Rollback in-memory non-logged state to an earlier point in time.
*
- * This method uses a timestamp to define the rollback point, and thus
- * requires that the application uses timestamps and that the
- * stable_timestamp must have been set via a call to
- * WT_CONNECTION::set_timestamp. Any updates to checkpoint durable
- * tables that are more recent than the stable timestamp are removed.
+ * This method uses a timestamp to define the rollback point, and requires the application
+ * use timestamps, the stable_timestamp have been set via a call to
+ * WT_CONNECTION::set_timestamp, and a checkpoint operating on the last stable timestamp
+ * to have completed. Any updates to checkpoint durable tables that are more recent than
+ * the stable timestamp are removed.
*
- * This method requires that there are no active operations for the
- * duration of the call.
+ * This method requires that there are no active operations for the duration of the call.
*
- * Any updates made to logged tables will not be rolled back. Any
- * updates made without an associated timestamp will not be rolled
- * back. See @ref transaction_timestamps.
+ * Any updates made to logged tables will not be rolled back. Any updates made without an
+ * associated timestamp will not be rolled back. See @ref transaction_timestamps.
*
* @snippet ex_all.c rollback to stable
*
@@ -2912,6 +2923,12 @@ struct __wt_connection {
* @config{multiprocess, permit sharing between processes (will automatically start an RPC server
* for primary processes and use RPC for secondary processes). <b>Not yet supported in
* WiredTiger</b>., a boolean flag; default \c false.}
+ * @config{operation_timeout_ms, when non-zero\, a requested limit on the number of elapsed real
+ * time milliseconds application threads will take to complete database operations. Time is
+ * measured from the start of each WiredTiger API call. There is no guarantee any operation will
+ * not take longer than this amount of time. If WiredTiger notices the limit has been exceeded\, an
+ * operation may return a WT_ROLLBACK error. Default is to have no limit., an integer greater than
+ * or equal to 1; default \c 0.}
* @config{operation_tracking = (, enable tracking of performance-critical functions. See @ref
* operation_tracking for more information., a set of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable operation tracking subsystem., a boolean flag;
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 3bc4f02c258..2b281443f21 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -430,6 +430,7 @@ typedef uint64_t wt_timestamp_t;
#include "packing.i"
#include "reconcile.i"
#include "serial.i"
+#include "time.i"
#if defined(__cplusplus)
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c
index 9b4729994df..6009a532c8c 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_time.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_time.c
@@ -14,6 +14,7 @@
*/
void
__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
WT_DECL_RET;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_child.c b/src/third_party/wiredtiger/src/reconcile/rec_child.c
index 99342d8ed94..b1d696e2ac6 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_child.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_child.c
@@ -94,10 +94,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_C
if (F_ISSET(r, WT_REC_EVICT))
return (__wt_set_return(session, EBUSY));
- /*
- * If there are deleted child pages we can't discard immediately, keep the page dirty so they
- * are eventually freed.
- */
+ /* If the page cannot be marked clean. */
r->leave_dirty = true;
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 9f3150d362b..06dcf73fbb5 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -9,6 +9,19 @@
#include "wt_internal.h"
/*
+ * __rec_update_durable --
+ * Return whether an update is suitable for writing to a disk image.
+ */
+static bool
+__rec_update_durable(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *upd)
+{
+ return (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+ __wt_txn_upd_visible_all(session, upd) :
+ __wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE &&
+ __wt_txn_visible(session, upd->txnid, upd->durable_ts));
+}
+
+/*
* __rec_update_save --
* Save a WT_UPDATE list for later restoration.
*/
@@ -111,11 +124,11 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
{
WT_PAGE *page;
- WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
- wt_timestamp_t timestamp, ts;
+ WT_UPDATE *first_txn_upd, *first_upd, *upd;
+ wt_timestamp_t max_ts;
size_t upd_memsize;
uint64_t max_txn, txnid;
- bool all_visible, list_prepared, list_uncommitted, skipped_birthmark;
+ bool all_stable, list_prepared, list_uncommitted, skipped_birthmark;
/*
* The "saved updates" return value is used independently of returning an update we can write,
@@ -125,8 +138,9 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
upd_select->upd_saved = false;
page = r->page;
- first_ts_upd = first_txn_upd = NULL;
+ first_txn_upd = NULL;
upd_memsize = 0;
+ max_ts = WT_TS_NONE;
max_txn = WT_TXN_NONE;
list_prepared = list_uncommitted = skipped_birthmark = false;
@@ -152,8 +166,6 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
*/
if (first_txn_upd == NULL)
first_txn_upd = upd;
-
- /* Track the largest transaction ID seen. */
if (WT_TXNID_LT(max_txn, txnid))
max_txn = txnid;
@@ -170,21 +182,23 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* prepared transaction IDs are globally visible, need to check the update state as well.
*/
if (F_ISSET(r, WT_REC_EVICT)) {
- if (upd->prepare_state == WT_PREPARE_LOCKED ||
- upd->prepare_state == WT_PREPARE_INPROGRESS) {
- list_prepared = true;
- continue;
- }
if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? WT_TXNID_LE(r->last_running, txnid) :
!__txn_visible_id(session, txnid)) {
r->update_uncommitted = list_uncommitted = true;
continue;
}
+ if (upd->prepare_state == WT_PREPARE_LOCKED ||
+ upd->prepare_state == WT_PREPARE_INPROGRESS) {
+ list_prepared = true;
+ if (upd->start_ts > max_ts)
+ max_ts = upd->start_ts;
+ continue;
+ }
}
/* Track the first update with non-zero timestamp. */
- if (first_ts_upd == NULL && upd->start_ts != WT_TS_NONE)
- first_ts_upd = upd;
+ if (upd->durable_ts > max_ts)
+ max_ts = upd->durable_ts;
/*
* Select the update to write to the disk image.
@@ -202,8 +216,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
if (upd_select->upd == NULL && r->las_skew_newest)
upd_select->upd = upd;
- if (F_ISSET(r, WT_REC_VISIBLE_ALL) ? !__wt_txn_upd_visible_all(session, upd) :
- !__wt_txn_upd_durable(session, upd)) {
+ if (!__rec_update_durable(session, r, upd)) {
if (F_ISSET(r, WT_REC_EVICT))
++r->updates_unstable;
@@ -214,21 +227,29 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* discard an uncommitted update.
*/
if (F_ISSET(r, WT_REC_UPDATE_RESTORE) && upd_select->upd != NULL &&
- (list_prepared || list_uncommitted)) {
- r->leave_dirty = true;
+ (list_prepared || list_uncommitted))
return (__wt_set_return(session, EBUSY));
- }
if (upd->type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
+ /*
+ * Track the oldest update not on the page.
+ *
+ * This is used to decide whether reads can use the
+ * page image, hence using the start rather than the
+ * durable timestamp.
+ */
+ if (upd_select->upd == NULL && upd->start_ts < r->min_skipped_ts)
+ r->min_skipped_ts = upd->start_ts;
+
continue;
}
/*
* Lookaside without stable timestamp was taken care of above
- * (set to the first uncommitted transaction). Lookaside with
- * stable timestamp always takes the first stable update.
+ * (set to the first uncommitted transaction). All other
+ * reconciliation takes the first stable update.
*/
if (upd_select->upd == NULL)
upd_select->upd = upd;
@@ -262,6 +283,9 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
if (upd == first_txn_upd)
r->update_used = true;
+ if (upd != NULL && upd->durable_ts > r->max_ondisk_ts)
+ r->max_ondisk_ts = upd->durable_ts;
+
/*
* TIMESTAMP-FIXME The start timestamp is determined by the commit timestamp when the key is
* first inserted (or last updated). The end timestamp is set when a key/value pair becomes
@@ -308,8 +332,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
r->max_txn = max_txn;
/* Update the maximum timestamp. */
- if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->durable_ts)
- r->max_timestamp = first_ts_upd->durable_ts;
+ if (max_ts > r->max_ts)
+ r->max_ts = max_ts;
/*
* If the update we chose was a birthmark, or we are doing update-restore and we skipped a
@@ -327,19 +351,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
}
/*
- * Check if all updates on the page are visible. If not, it must stay
- * dirty unless we are saving updates to the lookaside table.
+ * Check if all updates on the page are visible, if not, it must stay dirty.
*
- * Updates can be out of transaction ID order (but not out of timestamp
- * order), so we track the maximum transaction ID and the newest update
- * with a timestamp (if any).
+ * Updates can be out of transaction ID order (but not out of timestamp order), so we track the
+ * maximum transaction ID and the newest update with a timestamp (if any).
*/
- timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->durable_ts;
- all_visible = upd == first_txn_upd && !list_prepared && !list_uncommitted &&
- (F_ISSET(r, WT_REC_VISIBLE_ALL) ? __wt_txn_visible_all(session, max_txn, timestamp) :
- __wt_txn_visible(session, max_txn, timestamp));
+ all_stable = upd == first_txn_upd && !list_prepared && !list_uncommitted &&
+ __wt_txn_visible_all(session, max_txn, max_ts);
- if (all_visible)
+ if (all_stable)
goto check_original_value;
r->leave_dirty = true;
@@ -347,9 +367,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
WT_PANIC_RET(session, EINVAL, "reconciliation error, update not visible");
- /*
- * If not trying to evict the page, we know what we'll write and we're done.
- */
+ /* If not trying to evict the page, we know what we'll write and we're done. */
if (!F_ISSET(r, WT_REC_EVICT))
goto check_original_value;
@@ -382,54 +400,6 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
WT_RET(__rec_update_save(session, r, ins, ripcip, upd_select->upd, upd_memsize));
upd_select->upd_saved = true;
- /*
- * Track the first off-page update when saving history in the lookaside table. When skewing
- * newest, we want the first (non-aborted) update after the one stored on the page. Otherwise,
- * we want the update before the on-page update.
- */
- if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
- if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
- r->unstable_txn = first_upd->txnid;
- if (first_ts_upd != NULL) {
- WT_ASSERT(session, first_ts_upd->prepare_state == WT_PREPARE_INPROGRESS ||
- first_ts_upd->start_ts <= first_ts_upd->durable_ts);
-
- if (r->unstable_timestamp < first_ts_upd->start_ts)
- r->unstable_timestamp = first_ts_upd->start_ts;
-
- if (r->unstable_durable_timestamp < first_ts_upd->durable_ts)
- r->unstable_durable_timestamp = first_ts_upd->durable_ts;
- }
- } else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- for (upd = first_upd; upd != upd_select->upd; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (upd->txnid != WT_TXN_NONE && WT_TXNID_LT(upd->txnid, r->unstable_txn))
- r->unstable_txn = upd->txnid;
-
- /*
- * The durable timestamp is always set by commit, and usually the same as the start
- * timestamp, which makes it OK to use the two independently and be confident both will
- * be set.
- */
- WT_ASSERT(session,
- upd->prepare_state == WT_PREPARE_INPROGRESS || upd->durable_ts >= upd->start_ts);
-
- if (r->unstable_timestamp > upd->start_ts)
- r->unstable_timestamp = upd->start_ts;
-
- /*
- * An in-progress prepared update will always have a zero durable timestamp. Checkpoints
- * can only skip reading lookaside history if all updates are in the future, including
- * the prepare, so including the prepare timestamp instead.
- */
- ts = upd->prepare_state == WT_PREPARE_INPROGRESS ? upd->start_ts : upd->durable_ts;
- if (r->unstable_durable_timestamp > ts)
- r->unstable_durable_timestamp = ts;
- }
- }
-
check_original_value:
/*
* Paranoia: check that we didn't choose an update that has since been rolled back.
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 6bd67f329e1..26b1849693a 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -404,7 +404,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* discard its history).
*/
mod->rec_max_txn = r->max_txn;
- mod->rec_max_timestamp = r->max_timestamp;
+ mod->rec_max_timestamp = r->max_ts;
/*
* Track the tree's maximum transaction ID (used to decide if it's safe to discard the
@@ -416,8 +416,8 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
if (!F_ISSET(r, WT_REC_EVICT)) {
if (WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
btree->rec_max_txn = r->max_txn;
- if (btree->rec_max_timestamp < r->max_timestamp)
- btree->rec_max_timestamp = r->max_timestamp;
+ if (btree->rec_max_timestamp < r->max_ts)
+ btree->rec_max_timestamp = r->max_ts;
}
/*
@@ -651,22 +651,8 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
/* Track the page's min/maximum transaction */
r->max_txn = WT_TXN_NONE;
- r->max_timestamp = 0;
-
- /*
- * Track the first unstable transaction (when skewing newest this is the newest update,
- * otherwise the newest update not on the page). This is the boundary between the on-page
- * information and the history stored in the lookaside table.
- */
- if (r->las_skew_newest) {
- r->unstable_txn = WT_TXN_NONE;
- r->unstable_timestamp = WT_TS_NONE;
- r->unstable_durable_timestamp = WT_TS_NONE;
- } else {
- r->unstable_txn = WT_TXN_ABORTED;
- r->unstable_timestamp = WT_TS_MAX;
- r->unstable_durable_timestamp = WT_TS_MAX;
- }
+ r->max_ondisk_ts = r->max_ts = WT_TS_NONE;
+ r->min_skipped_ts = WT_TS_MAX;
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
@@ -1649,17 +1635,9 @@ __rec_split_write_supd(
done:
if (F_ISSET(r, WT_REC_LOOKASIDE)) {
/* Track the oldest lookaside timestamp seen so far. */
- multi->page_las.skew_newest = r->las_skew_newest;
multi->page_las.max_txn = r->max_txn;
- multi->page_las.unstable_txn = r->unstable_txn;
- WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
- multi->page_las.max_timestamp = r->max_timestamp;
-
- WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
- r->unstable_durable_timestamp >= r->unstable_timestamp);
-
- multi->page_las.unstable_timestamp = r->unstable_timestamp;
- multi->page_las.unstable_durable_timestamp = r->unstable_durable_timestamp;
+ multi->page_las.max_ondisk_ts = r->max_ondisk_ts;
+ multi->page_las.min_skipped_ts = r->min_skipped_ts;
}
err:
diff --git a/src/third_party/wiredtiger/src/support/time.c b/src/third_party/wiredtiger/src/support/time.c
deleted file mode 100644
index 61cebb71b51..00000000000
--- a/src/third_party/wiredtiger/src/support/time.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*-
- * Copyright (c) 2014-2019 MongoDB, Inc.
- * Copyright (c) 2008-2014 WiredTiger, Inc.
- * All rights reserved.
- *
- * See the file LICENSE for redistribution information.
- */
-
-#include "wt_internal.h"
-
-/*
- * __time_check_monotonic --
- * Check and prevent time running backward. If we detect that it has, we set the time structure
- * to the previous values, making time stand still until we see a time in the future of the
- * highest value seen so far.
- */
-static void
-__time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
-{
- /*
- * Detect time going backward. If so, use the last saved timestamp.
- */
- if (session == NULL)
- return;
-
- if (tsp->tv_sec < session->last_epoch.tv_sec ||
- (tsp->tv_sec == session->last_epoch.tv_sec && tsp->tv_nsec < session->last_epoch.tv_nsec)) {
- WT_STAT_CONN_INCR(session, time_travel);
- *tsp = session->last_epoch;
- } else
- session->last_epoch = *tsp;
-}
-
-/*
- * __wt_epoch --
- * Return the time since the Epoch.
- */
-void
-__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
-{
- struct timespec tmp;
-
- /*
- * Read into a local variable, then check for monotonically increasing time, ensuring single
- * threads never see time move backward. We don't prevent multiple threads from seeing time move
- * backwards (even when reading time serially, the saved last-read time is per thread, not per
- * timer, so multiple threads can race the time). Nor do we prevent multiple threads
- * simultaneously reading the time from seeing random time or time moving backwards (assigning
- * the time structure to the returned memory location implies multicycle writes to memory).
- */
- __wt_epoch_raw(session, &tmp);
- __time_check_monotonic(session, &tmp);
- *tsp = tmp;
-}
-
-/*
- * __wt_seconds --
- * Return the seconds since the Epoch.
- */
-void
-__wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
-{
- struct timespec t;
-
- __wt_epoch(session, &t);
-
- /*
- * A time_t isn't guaranteed to fit into a uint64_t, but it's asserted when WiredTiger builds.
- */
- *secondsp = (uint64_t)t.tv_sec;
-}
-
-/*
- * __wt_seconds32 --
- * Return the seconds since the Epoch in 32 bits.
- */
-void
-__wt_seconds32(WT_SESSION_IMPL *session, uint32_t *secondsp)
-{
- struct timespec t;
-
- __wt_epoch(session, &t);
-
- /*
- * This won't work in 2038. But for now allow it.
- */
- *secondsp = (uint32_t)t.tv_sec;
-}
-
-/*
- * __wt_clock_to_nsec --
- * Convert from clock ticks to nanoseconds.
- */
-uint64_t
-__wt_clock_to_nsec(uint64_t end, uint64_t begin)
-{
- double clock_diff;
-
- /*
- * If the ticks were reset, consider it an invalid check and just return zero as the time
- * difference because we cannot compute anything meaningful.
- */
- if (end < begin)
- return (0);
- clock_diff = (double)(end - begin);
- return ((uint64_t)(clock_diff / __wt_process.tsc_nsec_ratio));
-}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 0374732dfa7..09caef4345e 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -468,6 +468,12 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_STRING_MATCH("read-committed", cval.str, cval.len) ? WT_ISO_READ_COMMITTED :
WT_ISO_READ_UNCOMMITTED;
+ /* Retrieve the maximum operation time, defaulting to the database-wide configuration. */
+ WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval));
+ session->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
+ if (session->operation_timeout_us == 0)
+ session->operation_timeout_us = S2C(session)->operation_timeout_us;
+
/*
* The default sync setting is inherited from the connection, but can
* be overridden by an explicit "sync" setting for this transaction.
@@ -615,6 +621,9 @@ __wt_txn_release(WT_SESSION_IMPL *session)
*/
txn->flags = 0;
txn->prepare_timestamp = WT_TS_NONE;
+
+ /* Clear operation timer. */
+ session->operation_timeout_us = 0;
}
/*
@@ -1501,19 +1510,143 @@ __wt_txn_activity_drain(WT_SESSION_IMPL *session)
* __wt_txn_global_shutdown --
* Shut down the global transaction state.
*/
-void
-__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
+int
+__wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const char **cfg)
{
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+ WT_SESSION_IMPL *s;
+ const char *ckpt_cfg;
+
+ conn = S2C(session);
+
+ /*
+ * Perform a system-wide checkpoint so that all tables are consistent with each other. All
+ * transactions are resolved but ignore timestamps to make sure all data gets to disk. Do this
+ * before shutting down all the subsystems. We have shut down all user sessions, but send in
+ * true for waiting for internal races.
+ */
+ WT_TRET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ ckpt_cfg = "use_timestamp=false";
+ if (cval.val != 0) {
+ ckpt_cfg = "use_timestamp=true";
+ if (conn->txn_global.has_stable_timestamp)
+ F_SET(conn, WT_CONN_CLOSING_TIMESTAMP);
+ }
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) {
+ s = NULL;
+ WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, &s));
+ if (s != NULL) {
+ const char *checkpoint_cfg[] = {
+ WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
+ wt_session = &s->iface;
+ WT_TRET(__wt_txn_checkpoint(s, checkpoint_cfg, true));
+
+ /*
+ * Mark the metadata dirty so we flush it on close, allowing recovery to be skipped.
+ */
+ WT_WITH_DHANDLE(s, WT_SESSION_META_DHANDLE(s), __wt_tree_modify_set(s));
+
+ WT_TRET(wt_session->close(wt_session, config));
+ }
+ }
+
/*
- * All application transactions have completed, ignore the pinned
- * timestamp so that updates can be evicted from the cache during
- * connection close.
+ * All application transactions have completed, ignore the pinned timestamp so that updates can
+ * be evicted from the cache during connection close.
*
- * Note that we are relying on a special case in __wt_txn_visible_all
- * that returns true during close when there is no pinned timestamp
- * set.
+ * Note that we are relying on a special case in __wt_txn_visible_all that returns true during
+ * close when there is no pinned timestamp set.
*/
- S2C(session)->txn_global.has_pinned_timestamp = false;
+ conn->txn_global.has_pinned_timestamp = false;
+
+ return (ret);
+}
+
+/*
+ * __wt_txn_is_blocking_old --
+ * Return if this transaction is the oldest transaction in the system, called by eviction to
+ * determine if a worker thread should be released from eviction.
+ */
+int
+__wt_txn_is_blocking_old(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *state;
+ uint64_t id;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+ txn = &session->txn;
+ txn_global = &conn->txn_global;
+
+ if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE))
+ return (false);
+
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+
+ /*
+ * Check if the transaction is oldest one in the system. It's safe to ignore sessions allocating
+ * transaction IDs, since we already have an ID, they are guaranteed to be newer.
+ */
+ for (i = 0, state = txn_global->states; i < session_cnt; i++, state++) {
+ if (state->is_allocating)
+ continue;
+
+ WT_ORDERED_READ(id, state->id);
+ if (id != WT_TXN_NONE && WT_TXNID_LT(id, txn->id))
+ break;
+ }
+ return (i == session_cnt ?
+ __wt_txn_rollback_required(session, "oldest transaction ID rolled back for eviction") :
+ 0);
+}
+
+/*
+ * __wt_txn_is_blocking_pin --
+ * Return if this transaction is likely blocking eviction because of a pinned transaction ID,
+ * called by eviction to determine if a worker thread should be released from eviction.
+ */
+int
+__wt_txn_is_blocking_pin(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_SESSION_IMPL *s;
+ WT_TXN *txn;
+ uint64_t snap_min;
+ uint32_t i, session_cnt;
+
+ conn = S2C(session);
+ txn = &session->txn;
+
+ /*
+ * Check if we hold the oldest pinned transaction ID in the system. This potentially means
+ * rolling back a read-only transaction, which MongoDB can't (yet) handle. For this reason,
+ * don't check unless we're configured to time out thread operations, a way to confirm our
+ * caller is prepared for rollback.
+ */
+ if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) || txn->snap_min == WT_TXN_NONE)
+ return (0);
+ if (!__wt_op_timer_fired(session))
+ return (0);
+
+ WT_ORDERED_READ(session_cnt, conn->session_cnt);
+
+ for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
+ if (F_ISSET(s, WT_SESSION_INTERNAL) || !F_ISSET(&s->txn, WT_TXN_HAS_SNAPSHOT))
+ continue;
+
+ WT_ORDERED_READ(snap_min, s->txn.snap_min);
+ if (snap_min != WT_TXN_NONE && snap_min < txn->snap_min)
+ break;
+ }
+ return (i == session_cnt ? __wt_txn_rollback_required(
+ session, "oldest pinned transaction ID rolled back for eviction") :
+ 0);
}
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 072406a25cc..ccfd378b3b7 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -618,7 +618,7 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, bool *trackingp, const char *cfg[
} else if (!F_ISSET(conn, WT_CONN_RECOVERING))
txn_global->meta_ckpt_timestamp = txn_global->recovery_timestamp;
} else if (!F_ISSET(conn, WT_CONN_RECOVERING))
- txn_global->meta_ckpt_timestamp = 0;
+ txn_global->meta_ckpt_timestamp = WT_TS_NONE;
__wt_writeunlock(session, &txn_global->rwlock);
@@ -949,13 +949,26 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
__checkpoint_stats(session);
/*
- * If timestamps were used to define the content of the checkpoint update the saved last
- * checkpoint timestamp, otherwise leave it alone. If a checkpoint is taken without
- * timestamps, it's likely a bug, but we don't want to clear the saved last checkpoint
- * timestamp regardless.
+ * If timestamps defined the checkpoint's content, set the saved last checkpoint timestamp,
+ * otherwise clear it. We clear it for a couple of reasons: applications can query it and we
+ * don't want to lie, and we use it to decide if WT_CONNECTION.rollback_to_stable is an
+ * allowed operation. For the same reason, don't set it to WT_TS_NONE when the checkpoint
+ * timestamp is WT_TS_NONE, set it to 1 so we can tell the difference.
*/
- if (use_timestamp)
- conn->txn_global.last_ckpt_timestamp = ckpt_tmp_ts;
+ if (use_timestamp) {
+ conn->txn_global.last_ckpt_timestamp = use_timestamp ? ckpt_tmp_ts : WT_TS_NONE;
+ /*
+ * MongoDB assumes the checkpoint timestamp will be initialized with WT_TS_NONE. In such
+ * cases it queries the recovery timestamp to determine the last stable recovery
+ * timestamp. So, if the recovery timestamp is valid, set the last checkpoint timestamp
+ * to recovery timestamp. This should never be a problem, as checkpoint timestamp should
+ * never be less than recovery timestamp. This could potentially avoid MongoDB making
+ * two calls to determine last stable recovery timestamp.
+ */
+ if (conn->txn_global.last_ckpt_timestamp == WT_TS_NONE)
+ conn->txn_global.last_ckpt_timestamp = conn->txn_global.recovery_timestamp;
+ } else
+ conn->txn_global.last_ckpt_timestamp = WT_TS_NONE;
}
err:
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 17e0b61c904..6ccb7625108 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -536,7 +536,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
r.session = session;
WT_MAX_LSN(&r.max_ckpt_lsn);
WT_MAX_LSN(&r.max_rec_lsn);
- conn->txn_global.recovery_timestamp = conn->txn_global.meta_ckpt_timestamp = 0;
+ conn->txn_global.recovery_timestamp = conn->txn_global.meta_ckpt_timestamp = WT_TS_NONE;
F_SET(conn, WT_CONN_RECOVERING);
WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config));
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 97c83c47414..0b2ec12a47a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -222,21 +222,20 @@ __txn_abort_newer_updates(WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t
{
WT_DECL_RET;
WT_PAGE *page;
+ WT_PAGE_LOOKASIDE *page_las;
uint32_t read_flags;
bool local_read;
/*
- * If we created a page image with updates the need to be rolled back,
+ * If we created a page image with updates that need to be rolled back,
* read the history into cache now and make sure the page is marked
* dirty. Otherwise, the history we need could be swept from the
* lookaside table before the page is read because the lookaside sweep
* code has no way to tell that the page image is invalid.
*
* So, if there is lookaside history for a page, first check if the
- * history needs to be rolled back make sure that history is loaded
- * into cache. That is, if skew_newest is true, so the disk image
- * potentially contained unstable updates, and the history is more
- * recent than the rollback timestamp.
+ * history needs to be rolled back then ensure the history is loaded
+ * into cache.
*
* Also, we have separately discarded any lookaside history more recent
* than the rollback timestamp. For page_las structures in cache,
@@ -247,9 +246,8 @@ __txn_abort_newer_updates(WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t
*/
local_read = false;
read_flags = WT_READ_WONT_NEED;
- if (ref->page_las != NULL) {
- if (ref->page_las->skew_newest &&
- rollback_timestamp < ref->page_las->unstable_durable_timestamp) {
+ if ((page_las = ref->page_las) != NULL) {
+ if (rollback_timestamp < page_las->max_ondisk_ts) {
/*
* Make sure we get back a page with history, not a limbo page.
*/
@@ -258,13 +256,10 @@ __txn_abort_newer_updates(WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t
WT_ASSERT(session,
ref->state != WT_REF_LIMBO && ref->page != NULL && __wt_page_is_modified(ref->page));
local_read = true;
+ page_las->max_ondisk_ts = rollback_timestamp;
}
- if (ref->page_las->max_timestamp > rollback_timestamp)
- ref->page_las->max_timestamp = rollback_timestamp;
- if (ref->page_las->unstable_durable_timestamp > rollback_timestamp)
- ref->page_las->unstable_durable_timestamp = rollback_timestamp;
- if (ref->page_las->unstable_timestamp > rollback_timestamp)
- ref->page_las->unstable_timestamp = rollback_timestamp;
+ if (rollback_timestamp < page_las->min_skipped_ts)
+ page_las->min_skipped_ts = rollback_timestamp;
}
/* Review deleted page saved to the ref */
@@ -436,8 +431,10 @@ __txn_rollback_to_stable_check(WT_SESSION_IMPL *session)
conn = S2C(session);
txn_global = &conn->txn_global;
- if (!txn_global->has_stable_timestamp)
- WT_RET_MSG(session, EINVAL, "rollback_to_stable requires a stable timestamp");
+
+ if (!txn_global->has_stable_timestamp || txn_global->last_ckpt_timestamp == WT_TS_NONE)
+ WT_RET_MSG(
+ session, EINVAL, "rollback_to_stable requires a checkpoint with a stable timestamp");
/*
* Help the user comply with the requirement that there are no concurrent operations. Protect
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index bf20d7568bc..c6131ac0e77 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -54,7 +54,8 @@ functions:
set -o errexit
set -o verbose
if [ "Windows_NT" = "$OS" ]; then
- scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe --enable-diagnostic ${smp_command|}
+ pip install scons==3.1.1
+ scons-3.1.1.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe --enable-diagnostic ${smp_command|}
else
cd build_posix
sh ./reconf
@@ -991,7 +992,8 @@ tasks:
set -o errexit
set -o verbose
- scons.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100 /we4133" wiredtiger.dll libwiredtiger.lib
+ pip install scons==3.1.1
+ scons-3.1.1.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100 /we4133" wiredtiger.dll libwiredtiger.lib
- name: fops
depends_on:
@@ -1025,9 +1027,6 @@ tasks:
cmd.exe /c "cd test\\format && ..\\..\\t_format.exe reverse=0 encryption=none logging_compression=none runs=20"
- name: million-collection-test
- depends_on: []
- run_on:
- - rhel62-large
commands:
- func: "fetch source"
- func: "fetch mongo-tests repo"
@@ -1035,6 +1034,7 @@ tasks:
params:
working_dir: mongo-tests
script: |
+ sudo su
set -o errexit
set -o verbose
ulimit -n 1000000
@@ -1176,10 +1176,10 @@ tasks:
buildvariants:
-- name: ubuntu1404
- display_name: Ubuntu 14.04
+- name: ubuntu1804
+ display_name: Ubuntu 18.04
run_on:
- - ubuntu1404-test
+ - ubuntu1804-test
expansions:
# It's ugly, but we need the absolute path here, not the relative
test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs top_srcdir=$(pwd)/.. top_builddir=$(pwd)
@@ -1244,10 +1244,10 @@ buildvariants:
- name: unit-test-bucket07
- name: fops
-- name: ubuntu1404-python3
- display_name: Ubuntu 14.04 (Python3)
+- name: ubuntu1804-python3
+ display_name: Ubuntu 18.04 (Python3)
run_on:
- - ubuntu1404-test
+ - ubuntu1804-test
expansions:
test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs top_srcdir=$(pwd)/.. top_builddir=$(pwd)
smp_command: -j $(grep -c ^processor /proc/cpuinfo)
@@ -1270,7 +1270,7 @@ buildvariants:
display_name: Large scale testing
batchtime: 1440 # 1 day
run_on:
- - rhel62-large
+ - rhel80-build
expansions:
configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++
tasks:
@@ -1280,14 +1280,14 @@ buildvariants:
display_name: Compatibility tests
batchtime: 10080 # 7 days
run_on:
- - ubuntu1404-test
+ - ubuntu1804-test
tasks:
- name: compatibility-test-for-mongodb-releases
- name: windows-64
display_name: Windows 64-bit
run_on:
- - windows-64-vs2013-test
+ - windows-64-vs2017-test
tasks:
- name: compile
- name: compile-windows-alt
@@ -1337,7 +1337,7 @@ buildvariants:
modules:
- enterprise
run_on:
- - ubuntu1604-zseries-small
+ - ubuntu1804-zseries-build
batchtime: 10080 # 7 days
expansions:
smp_command: -j $(grep -c ^processor /proc/cpuinfo)
diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am
index da55ffece4c..2d9bbf21eb8 100644
--- a/src/third_party/wiredtiger/test/format/Makefile.am
+++ b/src/third_party/wiredtiger/test/format/Makefile.am
@@ -4,7 +4,7 @@ AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = t
t_SOURCES =\
- backup.c bulk.c compact.c config.c lrt.c ops.c rebalance.c \
+ backup.c bulk.c compact.c config.c lrt.c ops.c random.c rebalance.c \
salvage.c snap.c t.c util.c wts.c
t_LDADD = $(top_builddir)/test/utility/libtest_util.la
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 58decce75af..492d5124a1c 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -222,6 +222,9 @@ static CONFIG c[] = {{"abort", "if timed run should drop core", /* 0% */
{"quiet", "quiet run (same as -q)", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_quiet, NULL},
+ {"random_cursor", "if random cursor reads configured", /* 10% */
+ C_BOOL, 10, 0, 0, &g.c_random_cursor, NULL},
+
{"read_pct", "percent operations that are reads", C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL},
{"rebalance", "rebalance testing", /* 100% */
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index e90bbf86998..890f03c845c 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -178,6 +178,7 @@ typedef struct {
uint32_t c_prefix_compression_min;
uint32_t c_prepare;
uint32_t c_quiet;
+ uint32_t c_random_cursor;
uint32_t c_read_pct;
uint32_t c_rebalance;
uint32_t c_repeat_data_pct;
@@ -345,6 +346,7 @@ void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t);
void key_gen_teardown(WT_ITEM *);
void key_init(void);
WT_THREAD_RET lrt(void *);
+WT_THREAD_RET random_kv(void *);
void path_setup(const char *);
int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
uint32_t rng(WT_RAND_STATE *);
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index a03b42e427b..6f5e7943c83 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -87,7 +87,7 @@ wts_ops(bool lastrun)
TINFO *tinfo, total;
WT_CONNECTION *conn;
WT_SESSION *session;
- wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid;
+ wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid, random_tid;
wt_thread_t timestamp_tid;
int64_t fourths, quit_fourths, thread_ops;
uint32_t i;
@@ -101,6 +101,7 @@ wts_ops(bool lastrun)
memset(&checkpoint_tid, 0, sizeof(checkpoint_tid));
memset(&compact_tid, 0, sizeof(compact_tid));
memset(&lrt_tid, 0, sizeof(lrt_tid));
+ memset(&random_tid, 0, sizeof(random_tid));
memset(&timestamp_tid, 0, sizeof(timestamp_tid));
modify_repl_init();
@@ -183,6 +184,8 @@ wts_ops(bool lastrun)
testutil_check(__wt_thread_create(NULL, &compact_tid, compact, NULL));
if (!SINGLETHREADED && g.c_long_running_txn)
testutil_check(__wt_thread_create(NULL, &lrt_tid, lrt, NULL));
+ if (g.c_random_cursor)
+ testutil_check(__wt_thread_create(NULL, &random_tid, random_kv, NULL));
if (g.c_txn_timestamps)
testutil_check(__wt_thread_create(NULL, &timestamp_tid, timestamp, tinfo_list));
@@ -267,6 +270,8 @@ wts_ops(bool lastrun)
testutil_check(__wt_thread_join(NULL, &compact_tid));
if (!SINGLETHREADED && g.c_long_running_txn)
testutil_check(__wt_thread_join(NULL, &lrt_tid));
+ if (g.c_random_cursor)
+ testutil_check(__wt_thread_join(NULL, &random_tid));
if (g.c_txn_timestamps)
testutil_check(__wt_thread_join(NULL, &timestamp_tid));
g.workers_finished = false;
diff --git a/src/third_party/wiredtiger/test/format/random.c b/src/third_party/wiredtiger/test/format/random.c
new file mode 100644
index 00000000000..131cb0bd258
--- /dev/null
+++ b/src/third_party/wiredtiger/test/format/random.c
@@ -0,0 +1,95 @@
+/*-
+ * Public Domain 2014-2019 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "format.h"
+
+/*
+ * random_kv --
+ * Do random cursor operations.
+ */
+WT_THREAD_RET
+random_kv(void *arg)
+{
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_ITEM key, value;
+ WT_SESSION *session;
+ uint32_t i;
+ u_int period;
+ const char *config;
+ bool simple;
+
+ (void)(arg); /* Unused parameter */
+
+ conn = g.wts_conn;
+
+ /* Random cursor ops are only supported on row-store. */
+ if (g.type != ROW)
+ return (WT_THREAD_RET_VALUE);
+
+ /* Open a session. */
+ testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+ for (simple = false;;) {
+ /* Alternate between simple random cursors and sample-size random cursors. */
+ config = simple ? "next_random=true" : "next_random=true,next_random_sample_size=37";
+ simple = !simple;
+
+ /*
+ * open_cursor can return EBUSY if concurrent with a metadata operation, retry in that case.
+ */
+ while ((ret = session->open_cursor(session, g.uri, NULL, config, &cursor)) == EBUSY)
+ __wt_yield();
+ testutil_check(ret);
+
+ /* This is just a smoke-test, get some key/value pairs. */
+ for (i = mmrand(NULL, 0, 1000); i > 0; --i) {
+ testutil_check(cursor->next(cursor));
+ testutil_check(cursor->get_key(cursor, &key));
+ testutil_check(cursor->get_value(cursor, &value));
+ }
+
+ testutil_check(cursor->close(cursor));
+
+ /* Sleep for some number of seconds. */
+ period = mmrand(NULL, 1, 10);
+
+ /* Sleep for short periods so we don't make the run wait. */
+ while (period > 0 && !g.workers_finished) {
+ --period;
+ __wt_sleep(1, 0);
+ }
+ if (g.workers_finished)
+ break;
+ }
+
+ testutil_check(session->close(session, NULL));
+
+ return (WT_THREAD_RET_VALUE);
+}
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 89a72f090e7..f3482861573 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -162,7 +162,8 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
",cache_size=%" PRIu32
"MB"
",checkpoint_sync=false"
- ",error_prefix=\"%s\"",
+ ",error_prefix=\"%s\""
+ ",operation_timeout_ms=2000",
g.c_cache, progname);
/* In-memory configuration. */
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode05.py b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
index f248a05e646..09597e7a38f 100644
--- a/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
@@ -43,9 +43,11 @@ class test_debug_mode05(wttest.WiredTigerTestCase):
def test_table_logging_rollback_to_stable(self):
self.session.create(self.uri, 'key_format=i,value_format=u')
+
cursor = self.session.open_cursor(self.uri, None)
self.conn.set_timestamp('stable_timestamp=' + timestamp_str(100))
+ self.session.checkpoint()
# Try doing a normal prepared txn and then rollback to stable.
self.session.begin_transaction()
diff --git a/src/third_party/wiredtiger/test/suite/test_las01.py b/src/third_party/wiredtiger/test/suite/test_las01.py
index 76f19b51768..679d01ae06d 100755
--- a/src/third_party/wiredtiger/test/suite/test_las01.py
+++ b/src/third_party/wiredtiger/test/suite/test_las01.py
@@ -83,10 +83,11 @@ class test_las01(wttest.WiredTigerTestCase):
# Skip the initial rows, which were not updated
for i in range(0, nrows+1):
self.assertEqual(cursor.next(), 0)
- if (check_value != cursor.get_value()):
- print("Check value : " + str(check_value))
- print("value : " + str(cursor.get_value()))
- self.assertTrue(check_value == cursor.get_value())
+ if check_value != cursor.get_value():
+ session.breakpoint()
+ self.assertTrue(check_value == cursor.get_value(),
+ "for key " + str(i) + ", expected " + str(check_value) +
+ ", got " + str(cursor.get_value()))
cursor.close()
session.close()
conn.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index acbad7e02a4..9e0e4a0cec0 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -78,7 +78,8 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Search for the expected items as well as iterating.
for k, v in expected.items():
if missing == False:
- self.assertEqual(cur[k], v, "for key " + str(k))
+ self.assertEqual(cur[k], v, "for key " + str(k) +
+ " expected " + str(v) + ", got " + str(cur[k]))
else:
cur.set_key(k)
if self.empty:
@@ -162,7 +163,11 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Roll back half timestamps.
stable_ts = timestamp_str(key_range // 2)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
+
+ # We're about to test rollback-to-stable which requires a checkpoint to which we can roll back.
+ self.session.checkpoint()
self.conn.rollback_to_stable()
+
stat_cursor = self.session.open_cursor('statistics:', None, None)
calls = stat_cursor[stat.conn.txn_rollback_to_stable][2]
upd_aborted = (stat_cursor[stat.conn.txn_rollback_upd_aborted][2] +
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp06.py b/src/third_party/wiredtiger/test/suite/test_timestamp06.py
index 55981f67a98..fd004a23703 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp06.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp06.py
@@ -157,7 +157,7 @@ class test_timestamp06(wttest.WiredTigerTestCase, suite_subprocess):
# Scenario: 1
# Check that we see all the latest values (i.e. 3) as per transaction
- # visibility when reading with out the read timestamp.
+ # visibility when reading without the read timestamp.
# All tables should see all the values.
self.check(self.session, "", self.table_ts_log,
dict((k, 3) for k in orig_keys))
@@ -204,8 +204,12 @@ class test_timestamp06(wttest.WiredTigerTestCase, suite_subprocess):
self.ckpt_backup(2, (nkeys - valcnt_ts_log), (nkeys - valcnt_ts_nolog))
# Scenario: 3
- # Check that we see all the data values correctly after rollback
+ # Check we see all the data values correctly after rollback. Skip the case where the most
+ # recent checkpoint wasn't based on the last stable timestamp, those can't be rolled back.
+ if self.ckpt_ts == False:
+ return
self.conn.rollback_to_stable()
+
# All tables should see the values correctly when read with
# read timestamp as stable timestamp.
self.check(self.session, 'read_timestamp=' + stable_ts,
@@ -214,7 +218,7 @@ class test_timestamp06(wttest.WiredTigerTestCase, suite_subprocess):
self.table_ts_log, dict((k, 2) for k in orig_keys))
# Scenario: 4
- # Check that we see the values correctly when read with out any
+ # Check that we see the values correctly when read without any
# timestamp.
if self.using_log == True:
# For logged table we should see latest values (i.e. 3) when logging
@@ -224,21 +228,13 @@ class test_timestamp06(wttest.WiredTigerTestCase, suite_subprocess):
else:
# When logging is disabled, we should not see the values beyond the
# stable timestamp with timestamped checkpoints.
- if self.ckpt_ts == True:
- self.check(self.session, "",
- self.table_ts_log, dict((k, 2) for k in orig_keys))
- else:
- self.check(self.session, "",
- self.table_ts_log, dict((k, 3) for k in orig_keys))
+ self.check(self.session, "",
+ self.table_ts_log, dict((k, 2) for k in orig_keys))
# For non-logged table we should not see the values beyond the
# stable timestamp with timestamped checkpoints.
- if self.ckpt_ts == True:
- self.check(self.session, "",
- self.table_ts_nolog, dict((k, 2) for k in orig_keys))
- else:
- self.check(self.session, "",
- self.table_ts_nolog, dict((k, 3) for k in orig_keys))
+ self.check(self.session, "",
+ self.table_ts_nolog, dict((k, 2) for k in orig_keys))
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp11.py b/src/third_party/wiredtiger/test/suite/test_timestamp11.py
index 1256a544d78..f3d03cd8fa5 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp11.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp11.py
@@ -83,6 +83,7 @@ class test_timestamp11(wttest.WiredTigerTestCase, suite_subprocess):
#
stable_ts = timestamp_str(2)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
+ self.session.checkpoint()
self.conn.rollback_to_stable()
c = self.session.open_cursor(uri)
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp16.py b/src/third_party/wiredtiger/test/suite/test_timestamp16.py
index bef116d62a9..20663889450 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp16.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp16.py
@@ -50,25 +50,21 @@ class test_timestamp16(wttest.WiredTigerTestCase, suite_subprocess):
self.session.begin_transaction('read_timestamp=100')
self.session.rollback_transaction()
self.session.checkpoint('use_timestamp=true')
- self.assertTimestampsEqual('0',
- self.conn.query_timestamp('get=last_checkpoint'))
+ self.assertTimestampsEqual('0', self.conn.query_timestamp('get=last_checkpoint'))
- # Set a stable and make sure that we still checkpoint at
- # the stable.
- self.conn.set_timestamp('stable_timestamp=1')
+ # Set a stable and make sure that we still checkpoint at the stable.
+ self.conn.set_timestamp('stable_timestamp=2')
self.session.begin_transaction('read_timestamp=100')
self.session.rollback_transaction()
self.session.checkpoint('use_timestamp=true')
- self.assertTimestampsEqual('1',
- self.conn.query_timestamp('get=last_checkpoint'))
+ self.assertTimestampsEqual('2', self.conn.query_timestamp('get=last_checkpoint'))
# Finally make sure that commit also resets the read timestamp.
self.session.create(self.uri, 'key_format=i,value_format=i')
self.session.begin_transaction('read_timestamp=150')
self.session.commit_transaction()
self.session.checkpoint('use_timestamp=true')
- self.assertTimestampsEqual('1',
- self.conn.query_timestamp('get=last_checkpoint'))
+ self.assertTimestampsEqual('2', self.conn.query_timestamp('get=last_checkpoint'))
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_txn21.py b/src/third_party/wiredtiger/test/suite/test_txn21.py
new file mode 100644
index 00000000000..212a4d321b6
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn21.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_txn21.py
+# Transactions: smoke test the operation timeout API
+#
+
+import wiredtiger, wttest
+
+class test_txn21(wttest.WiredTigerTestCase):
+
+ # Connection-level configuration.
+ def test_operation_timeout_conn(self):
+ # Close the automatically opened connection and open one with the timeout configuration.
+ conn_config = 'operation_timeout_ms=2000'
+ self.conn.close()
+ self.conn = wiredtiger.wiredtiger_open(self.home, conn_config)
+
+ # Transaction-level configuration.
+ def test_operation_timeout_txn(self):
+ self.session.begin_transaction('operation_timeout_ms=2000')
+
+if __name__ == '__main__':
+ wttest.run()