summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py5
-rw-r--r--src/third_party/wiredtiger/dist/s_clang-scan.diff12
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/lang/java/wiredtiger.i5
-rw-r--r--src/third_party/wiredtiger/lang/python/wiredtiger.i4
-rw-r--r--src/third_party/wiredtiger/src/async/async_op.c2
-rw-r--r--src/third_party/wiredtiger/src/async/async_worker.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c24
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c26
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c10
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c6
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_write.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c138
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c134
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c169
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c61
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c59
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c62
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c241
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c31
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c69
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c23
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c236
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c141
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c8
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c21
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c6
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_bulk.c4
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c6
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_index.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c4
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c18
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_table.c8
-rw-r--r--src/third_party/wiredtiger/src/docs/backup.dox7
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c7
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c244
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h82
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i35
-rw-r--r--src/third_party/wiredtiger/src/include/cell.h27
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i456
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h11
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h8
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i23
-rw-r--r--src/third_party/wiredtiger/src/include/error.h43
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h138
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h8
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.i4
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h23
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.i72
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h4
-rw-r--r--src/third_party/wiredtiger/src/include/session.h51
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1
-rw-r--r--src/third_party/wiredtiger/src/include/timestamp.h54
-rw-r--r--src/third_party/wiredtiger/src/include/timestamp.i225
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h22
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i211
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in34
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h22
-rw-r--r--src/third_party/wiredtiger/src/log/log.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c18
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c6
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c2
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_merge.c2
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c2
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_worker.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c69
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c6
-rw-r--r--src/third_party/wiredtiger/src/optrack/optrack.c2
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_errno.c3
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c10
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c10
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_time.c4
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_mtx_cond.c4
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_thread.c5
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_child.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c309
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c200
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c129
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c74
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_util.c4
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c10
-rw-r--r--src/third_party/wiredtiger/src/support/err.c128
-rw-r--r--src/third_party/wiredtiger/src/support/generation.c4
-rw-r--r--src/third_party/wiredtiger/src/support/hazard.c2
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c105
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c6
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c4
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c192
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c82
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c7
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c70
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c149
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c53
-rwxr-xr-xsrc/third_party/wiredtiger/src/utilities/util_dump.c12
-rw-r--r--src/third_party/wiredtiger/test/csuite/Makefile.am3
-rw-r--r--src/third_party/wiredtiger/test/csuite/random_abort/main.c12
-rwxr-xr-xsrc/third_party/wiredtiger/test/csuite/random_abort/smoke.sh4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c4
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml714
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh24
-rw-r--r--src/third_party/wiredtiger/test/fops/Makefile.am3
-rw-r--r--src/third_party/wiredtiger/test/format/bulk.c17
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c24
-rw-r--r--src/third_party/wiredtiger/test/suite/test_backup01.py24
-rw-r--r--src/third_party/wiredtiger/test/suite/test_checkpoint05.py84
-rw-r--r--src/third_party/wiredtiger/test/suite/test_debug_mode05.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_durable_ts03.py3
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs06.py3
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs08.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs09.py5
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs10.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs12.py105
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare_hs01.py34
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare_hs03.py3
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py2
-rwxr-xr-xsrc/third_party/wiredtiger/test/suite/test_txn19.py20
138 files changed, 3280 insertions, 2925 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 30a47fa3201..6bef5f96b93 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1523,8 +1523,9 @@ methods = {
including the named checkpoint, or
\c "to=<checkpoint>" to drop all checkpoints before and
including the named checkpoint. Checkpoints cannot be
- dropped while a hot backup is in progress or if open in
- a cursor''', type='list'),
+ dropped if open in a cursor. While a hot backup is in
+ progress, checkpoints created prior to the start of the
+ backup cannot be dropped''', type='list'),
Config('force', 'false', r'''
if false (the default), checkpoints may be skipped if the underlying object has not been
modified, if true, this option forces the checkpoint''',
diff --git a/src/third_party/wiredtiger/dist/s_clang-scan.diff b/src/third_party/wiredtiger/dist/s_clang-scan.diff
index d7177e94279..3c0bd823a4c 100644
--- a/src/third_party/wiredtiger/dist/s_clang-scan.diff
+++ b/src/third_party/wiredtiger/dist/s_clang-scan.diff
@@ -1,11 +1,11 @@
In file included from src/block/block_write.c:9:
-In file included from ./src/include/wt_internal.h:418:
+In file included from ./src/include/wt_internal.h:420:
./src/include/intpack.i:193:7: warning: Assigned value is garbage or undefined
p = *pp;
^ ~~~
-1 warning generated.
+1 warning generated
In file included from src/btree/col_modify.c:9:
-In file included from ./src/include/wt_internal.h:423:
+In file included from ./src/include/wt_internal.h:425:
./src/include/mutex.i:158:13: warning: Null pointer passed as an argument to a 'nonnull' parameter
return (pthread_mutex_trylock(&t->lock));
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -14,12 +14,8 @@ src/conn/conn_capacity.c:291:5: warning: Value stored to 'capacity' is never rea
capacity = steal_capacity = 0;
^ ~~~~~~~~~~~~~~~~~~
1 warning generated.
-src/reconcile/rec_col.c:1111:25: warning: Null pointer passed as an argument to a 'nonnull' parameter
- memcmp(last.value->data, data, size) == 0))) {
- ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-1 warning generated.
In file included from src/reconcile/rec_write.c:9:
-In file included from ./src/include/wt_internal.h:423:
+In file included from ./src/include/wt_internal.h:425:
./src/include/mutex.i:184:16: warning: Null pointer passed as an argument to a 'nonnull' parameter
if ((ret = pthread_mutex_unlock(&t->lock)) != 0)
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 29f43d94ef7..e9422174821 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -503,6 +503,7 @@ autoconf
automake
bInheritHandle
backoff
+backport
bal
basecfg
basho
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index d519fa820b9..181a3c29847 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -823,6 +823,7 @@ dsrc_stats = [
RecStat('rec_overflow_key_internal', 'internal-page overflow keys'),
RecStat('rec_overflow_key_leaf', 'leaf-page overflow keys'),
RecStat('rec_overflow_value', 'overflow values written'),
+ RecStat('rec_prepare_value', 'prepared values written'),
RecStat('rec_page_delete', 'pages deleted'),
RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
RecStat('rec_page_match', 'page checksum matches'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 477909db5c1..4fcc4a17180 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "a707df12a2503ad39ccdd82a84062faa6a07e082"
+ "commit": "18dfb9e58e39927696affcd8e362364e23e1aa59"
}
diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i
index cd1174e6e48..e544951909e 100644
--- a/src/third_party/wiredtiger/lang/java/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i
@@ -2264,8 +2264,7 @@ err: if (ret != 0)
if ((ret = $self->open_cursor($self, uri, to_dup, config, &cursor)) != 0)
goto err;
- if ((ret = __wt_calloc_def((WT_SESSION_IMPL *)cursor->session,
- 1, &jcb)) != 0)
+ if ((ret = __wt_calloc_def(CUR2S(cursor), 1, &jcb)) != 0)
goto err;
if ((cursor->flags & WT_CURSTD_RAW) != 0)
@@ -2274,7 +2273,7 @@ err: if (ret != 0)
cursor->flags |= WT_CURSTD_RAW;
jcb->jnienv = jenv;
- jcb->session = (WT_SESSION_IMPL *)cursor->session;
+ jcb->session = CUR2S(cursor);
cursor->lang_private = jcb;
err: if (ret != 0)
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i
index 7f5f6d17e96..249cde15837 100644
--- a/src/third_party/wiredtiger/lang/python/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i
@@ -1347,7 +1347,7 @@ cursorCloseHandler(WT_CURSOR *cursor)
cursor->lang_private = NULL;
if (pcb != NULL)
ret = pythonClose(pcb);
- __wt_free((WT_SESSION_IMPL *)cursor->session, pcb);
+ __wt_free(CUR2S(cursor), pcb);
return (ret);
}
@@ -1374,7 +1374,7 @@ cursorFreeHandler(WT_CURSOR *cursor)
pcb = (PY_CALLBACK *)cursor->lang_private;
cursor->lang_private = NULL;
- __wt_free((WT_SESSION_IMPL *)cursor->session, pcb);
+ __wt_free(CUR2S(cursor), pcb);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/async/async_op.c b/src/third_party/wiredtiger/src/async/async_op.c
index b696d2cc366..3faa53808c8 100644
--- a/src/third_party/wiredtiger/src/async/async_op.c
+++ b/src/third_party/wiredtiger/src/async/async_op.c
@@ -291,7 +291,7 @@ __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op)
#ifdef HAVE_DIAGNOSTIC
WT_ORDERED_READ(my_op, async->async_queue[my_slot]);
if (my_op != NULL)
- return (__wt_panic(session));
+ return (__wt_panic(session, WT_PANIC, "async failure"));
#endif
WT_PUBLISH(async->async_queue[my_slot], op);
op->state = WT_ASYNCOP_ENQUEUED;
diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c
index 23098c5b8c5..eb301b2cdfa 100644
--- a/src/third_party/wiredtiger/src/async/async_worker.c
+++ b/src/third_party/wiredtiger/src/async/async_worker.c
@@ -309,7 +309,7 @@ __wt_async_worker(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "async worker error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "async worker error"));
}
/*
* Worker thread cleanup, close our cached cursors and free all the WT_ASYNC_CURSOR structures.
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 6cbe80a9317..292917be5d6 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -196,11 +196,11 @@ __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
case WT_CKPT_INPROGRESS:
case WT_CKPT_PANIC_ON_FAILURE:
case WT_CKPT_SALVAGE:
- __wt_err(session, EINVAL,
+ ret = __wt_panic(session, EINVAL,
"%s: an unexpected checkpoint start: the checkpoint "
"has already started or was configured for salvage",
block->name);
- ret = __wt_block_panic(session);
+ __wt_block_set_readonly(session);
break;
case WT_CKPT_NONE:
block->ckpt_state = WT_CKPT_INPROGRESS;
@@ -389,11 +389,11 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase)
break;
case WT_CKPT_NONE:
case WT_CKPT_PANIC_ON_FAILURE:
- __wt_err(session, EINVAL,
+ ret = __wt_panic(session, EINVAL,
"%s: an unexpected checkpoint attempt: the checkpoint "
"was never started or has already completed",
block->name);
- ret = __wt_block_panic(session);
+ __wt_block_set_readonly(session);
break;
case WT_CKPT_SALVAGE:
/* Salvage doesn't use the standard checkpoint APIs. */
@@ -638,8 +638,8 @@ live_update:
err:
if (ret != 0 && fatal) {
- __wt_err(session, ret, "%s: fatal checkpoint failure", block->name);
- ret = __wt_block_panic(session);
+ ret = __wt_panic(session, ret, "%s: fatal checkpoint failure", block->name);
+ __wt_block_set_readonly(session);
}
if (locked)
@@ -860,26 +860,26 @@ __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block, bool fa
goto done;
case WT_CKPT_NONE:
case WT_CKPT_SALVAGE:
- __wt_err(session, EINVAL,
+ ret = __wt_panic(session, EINVAL,
"%s: an unexpected checkpoint resolution: the checkpoint "
"was never started or completed, or configured for salvage",
block->name);
- ret = __wt_block_panic(session);
+ __wt_block_set_readonly(session);
break;
case WT_CKPT_PANIC_ON_FAILURE:
if (!failed)
break;
- __wt_err(
+ ret = __wt_panic(
session, EINVAL, "%s: the checkpoint failed, the system must restart", block->name);
- ret = __wt_block_panic(session);
+ __wt_block_set_readonly(session);
break;
}
WT_ERR(ret);
if ((ret = __wt_block_extlist_merge(session, block, &ci->ckpt_avail, &ci->avail)) != 0) {
- __wt_err(
+ ret = __wt_panic(
session, ret, "%s: fatal checkpoint failure during extent list merge", block->name);
- ret = __wt_block_panic(session);
+ __wt_block_set_readonly(session);
}
__wt_spin_unlock(session, &block->live_lock);
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index 632908d6b0a..8e854da15c1 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -13,11 +13,12 @@
* Handle extension list errors that would normally panic the system but
* which should fail gracefully when verifying.
*/
-#define WT_BLOCK_RET(session, block, v, ...) \
- do { \
- int __ret = (v); \
- __wt_err(session, __ret, __VA_ARGS__); \
- return ((block)->verify ? __ret : __wt_panic(session)); \
+#define WT_BLOCK_RET(session, block, v, ...) \
+ do { \
+ int __ret = (v); \
+ __wt_err(session, __ret, __VA_ARGS__); \
+ return ((block)->verify ? __ret : __wt_panic(session, WT_PANIC, \
+ "block manager extension list failure")); \
} while (0)
static int __block_append(WT_SESSION_IMPL *, WT_BLOCK *, WT_EXTLIST *, wt_off_t, wt_off_t);
@@ -297,13 +298,10 @@ __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list
else if (live && __block_off_match(&block->live.discard, offset, size))
name = "discard";
__wt_spin_unlock(session, &block->live_lock);
- if (name != NULL) {
- __wt_errx(session, "%s failed: %" PRIuMAX "/%" PRIu32
- " is on the %s list "
- "(%s, %d)",
- list, (uintmax_t)offset, size, name, func, line);
- return (__wt_panic(session));
- }
+ if (name != NULL)
+ return (__wt_panic(session, WT_PANIC,
+ "%s failed: %" PRIuMAX "/%" PRIu32 " is on the %s list (%s, %d)", list, (uintmax_t)offset,
+ size, name, func, line));
return (0);
}
#endif
@@ -336,7 +334,7 @@ __block_off_remove(
__block_size_srch(el->sz, ext->size, sstack);
szp = *sstack[0];
if (szp == NULL || szp->size != ext->size)
- WT_PANIC_RET(session, EINVAL, "extent not found in by-size list during remove");
+ WT_RET_PANIC(session, EINVAL, "extent not found in by-size list during remove");
__block_off_srch(szp->off, off, astack, true);
ext = *astack[0];
if (ext == NULL || ext->off != off)
@@ -643,7 +641,7 @@ __wt_block_extlist_check(WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *b
b = b->next[0];
continue;
}
- WT_PANIC_RET(session, EINVAL, "checkpoint merge check: %s list overlaps the %s list",
+ WT_RET_PANIC(session, EINVAL, "checkpoint merge check: %s list overlaps the %s list",
al->name, bl->name);
}
return (0);
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index c689e3848bf..07a368b0350 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -630,14 +630,12 @@ err:
}
/*
- * __wt_block_panic --
- * Report an error, then panic the handle and the system.
+ * __wt_block_set_readonly --
+ * Set the block API to read-only.
*/
-int
-__wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold))
+void
+__wt_block_set_readonly(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold))
{
/* Switch the handle into read-only mode. */
__bm_method_set(S2BT(session)->bm, true);
-
- return (__wt_panic(session));
}
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 638796e4459..e6b59847fcf 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -326,9 +326,9 @@ __desc_read(WT_SESSION_IMPL *session, uint32_t allocsize, WT_BLOCK *block)
* In the general case, we should return a generic error and signal that we've detected data
* corruption.
*
- * FIXME: MongoDB relies heavily on the error codes reported when opening cursors (which hits
- * this logic if the relevant data handle isn't already open). However this code gets run in
- * rollback to stable as part of recovery where we want to skip any corrupted data files
+ * FIXME-WT-5832: MongoDB relies heavily on the error codes reported when opening cursors (which
+ * hits this logic if the relevant data handle isn't already open). However this code gets run
+ * in rollback to stable as part of recovery where we want to skip any corrupted data files
* temporarily to allow MongoDB to initiate salvage. This is why we've been forced into this
* situation. We should address this as part of WT-5832 and clarify what error codes we expect
* to be returning across the API boundary.
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index 9c0d17fe28f..0226ede60e1 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -289,5 +289,5 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_
F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
if (block->verify || F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
return (WT_ERROR);
- WT_PANIC_RET(session, WT_ERROR, "%s: fatal read error", block->name);
+ WT_RET_PANIC(session, WT_ERROR, "%s: fatal read error", block->name);
}
diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c
index d69371ee533..a8a0091d854 100644
--- a/src/third_party/wiredtiger/src/block/block_write.c
+++ b/src/third_party/wiredtiger/src/block/block_write.c
@@ -39,7 +39,7 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
* backups, which only copies log files, or targeted backups, stops all block truncation
* unnecessarily). We may want a more targeted solution at some point.
*/
- if (!conn->hot_backup) {
+ if (conn->hot_backup_start == 0) {
WT_WITH_HOTBACKUP_READ_LOCK(session, ret = __wt_ftruncate(session, block->fh, len), NULL);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index d6c89dacd33..9ea91c6f421 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -16,9 +16,8 @@ static inline int
__cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -58,28 +57,14 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iface.value.data = &cbt->v;
} else {
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it
- * to the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
}
cbt->iface.value.size = 1;
return (0);
@@ -95,12 +80,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_BTREE *btree;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = cbt->ref->page;
- upd = NULL;
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -127,33 +110,20 @@ new_page:
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
/*
- * FIXME-PM-1523: Now we only do transaction read if we have an update chain and it doesn't work
+ * FIXME-WT-6127: Now we only do transaction read if we have an update chain and it doesn't work
* in durable history. Review this when we have a plan for fixed-length column store.
*/
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
restart_read:
- WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it to
- * the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
cbt->iface.value.size = 1;
return (0);
}
@@ -166,9 +136,8 @@ static inline int
__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -187,18 +156,17 @@ new_page:
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
- if (upd == NULL)
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -216,10 +184,9 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_INSERT *ins;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint64_t rle, rle_start;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
rle_start = 0; /* -Werror=maybe-uninitialized */
@@ -258,18 +225,17 @@ restart_read:
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/*
@@ -309,8 +275,9 @@ restart_read:
continue;
}
- WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip, &upd));
- if (upd == NULL)
+ WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID ||
+ cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
continue;
return (0);
}
@@ -334,10 +301,9 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_PAGE *page;
WT_ROW *rip;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool kpack_used;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
key = &cbt->iface.key;
@@ -386,17 +352,16 @@ restart_read_insert:
if ((ins = cbt->ins) != NULL) {
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- WT_RET(__wt_txn_read_upd_list(session, ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* Check for the end of the page. */
@@ -422,17 +387,16 @@ restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
WT_RET(__wt_txn_read(
- session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL, &upd));
- if (upd == NULL)
+ session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -461,7 +425,7 @@ __cursor_key_order_check_col(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, boo
return (0);
}
- WT_PANIC_RET(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64
+ WT_RET_PANIC(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64
" then "
"key %" PRIu64,
next ? "next" : "prev", cbt->lastrecno, cbt->recno);
@@ -494,7 +458,7 @@ __cursor_key_order_check_row(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, boo
WT_ERR(__wt_scr_alloc(session, 512, &a));
WT_ERR(__wt_scr_alloc(session, 512, &b));
- WT_PANIC_ERR(session, EINVAL,
+ WT_ERR_PANIC(session, EINVAL,
"WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
"key %.1024s",
next ? "next" : "prev", __wt_buf_set_printable_format(session, cbt->lastkey->data,
@@ -536,7 +500,7 @@ __wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* Cursor searches set the position for cursor movements, set the last-key value for diagnostic
@@ -648,7 +612,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
bool newpage, restart;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_next);
WT_STAT_DATA_INCR(session, cursor_next);
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 0099d1ae594..f8db9cd6233 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -40,7 +40,7 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
uint64_t recno;
int i;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
restart:
/*
@@ -123,9 +123,8 @@ static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -198,28 +197,14 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iface.value.data = &cbt->v;
} else {
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it
- * to the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
}
cbt->iface.value.size = 1;
return (0);
@@ -235,9 +220,8 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_BTREE *btree;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
btree = S2BT(session);
@@ -265,35 +249,21 @@ new_page:
cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = NULL;
/*
- * FIXME-PM-1523: Now we only do transaction read if we have an update chain and it doesn't work
+ * FIXME-WT-6127: Now we only do transaction read if we have an update chain and it doesn't work
* in durable history. Review this when we have a plan for fixed-length column store.
*/
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
restart_read:
- WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it to
- * the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
cbt->iface.value.size = 1;
return (0);
}
@@ -306,9 +276,8 @@ static inline int
__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -327,17 +296,16 @@ new_page:
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK) && upd->type != WT_UPDATE_TOMBSTONE)
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -355,10 +323,9 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_INSERT *ins;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint64_t rle_start;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
rle_start = 0; /* -Werror=maybe-uninitialized */
@@ -398,18 +365,17 @@ restart_read:
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/*
@@ -449,8 +415,9 @@ restart_read:
continue;
}
- WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip, &upd));
- if (upd == NULL)
+ WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID ||
+ cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
continue;
return (0);
}
@@ -474,10 +441,9 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_PAGE *page;
WT_ROW *rip;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool kpack_used;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
key = &cbt->iface.key;
@@ -536,17 +502,16 @@ restart_read_insert:
if ((ins = cbt->ins) != NULL) {
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- WT_RET(__wt_txn_read_upd_list(session, ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* Check for the beginning of the page. */
@@ -574,17 +539,16 @@ restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
WT_RET(__wt_txn_read(
- session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL, &upd));
- if (upd == NULL)
+ session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -604,7 +568,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
bool newpage, restart;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_prev);
WT_STAT_DATA_INCR(session, cursor_prev);
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 61a0a2653f6..ccec03700d0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -60,7 +60,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt, bool search_operation)
WT_SESSION_IMPL *session;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
/*
* Check the page active flag, asserting the page reference with any external key.
@@ -171,21 +171,18 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
* Return if the cursor references an valid key/value pair.
*/
int
-__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE **updp, bool *valid)
+__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, bool *valid)
{
WT_BTREE *btree;
WT_CELL *cell;
WT_COL *cip;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- if (updp != NULL)
- *updp = NULL;
*valid = false;
btree = cbt->btree;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* We may be pointing to an insert object, and we may have a page with
@@ -232,22 +229,22 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
* update that's been deleted is not a valid key/value pair).
*/
if (cbt->ins != NULL) {
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- WT_ASSERT(session, !F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK));
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
return (0);
}
}
/*
+ * Clean out any stale value here. Calling a transaction read helper automatically clears this
+ * but we have some code paths that don't do this (fixed length column store is one example).
+ */
+ __wt_upd_value_clear(cbt->upd_value);
+
+ /*
* If we don't have an insert object, or in the case of column-store, there's an insert object
* but no update was visible to us and the key on the page is the same as the insert object's
* key, and the slot as set by the search function is valid, we can use the original page
@@ -299,17 +296,10 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
* Check for an update ondisk or in the history store. For column store, an insert object
* can have the same key as an on-page or history store object.
*/
- WT_RET(__wt_txn_read(session, cbt, key, recno, NULL, NULL, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
+ WT_RET(__wt_txn_read(session, cbt, key, recno, NULL, NULL));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
}
break;
@@ -335,17 +325,10 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
(page->modify != NULL && page->modify->mod_row_update != NULL) ?
page->modify->mod_row_update[cbt->slot] :
NULL,
- NULL, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
+ NULL));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
}
break;
@@ -363,7 +346,7 @@ __cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_WITH_PAGE_INDEX(
session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
@@ -379,7 +362,7 @@ __cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_WITH_PAGE_INDEX(
session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
@@ -429,7 +412,7 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt)
WT_SESSION_IMPL *session;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_reset);
WT_STAT_DATA_INCR(session, cursor_reset);
@@ -440,11 +423,11 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt)
}
/*
- * __wt_btcur_search_uncommitted --
- * Search and return exact matching records only, including uncommitted ones.
+ * __wt_btcur_search_prepared --
+ * Search and return exact matching records only.
*/
int
-__wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
+__wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp)
{
WT_BTREE *btree;
WT_CURSOR_BTREE *cbt;
@@ -500,12 +483,6 @@ __wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
break;
}
- /*
- * Like regular uncommitted updates, pages with prepared updates are pinned to the cache and can
- * never be written to the history store. Therefore, there is no need to do a search here for
- * uncommitted updates.
- */
-
*updp = upd;
return (0);
}
@@ -522,13 +499,11 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_search);
WT_STAT_DATA_INCR(session, cursor_search);
@@ -557,11 +532,11 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, false, cbt->ref, &leaf_found));
if (leaf_found && cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, cbt->ref, &leaf_found));
if (leaf_found && cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
if (!valid) {
@@ -570,16 +545,16 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, false, NULL, NULL));
if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, NULL, NULL));
if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
if (valid)
- ret = __cursor_kv_return(cbt, upd);
+ ret = __cursor_kv_return(cbt, cbt->upd_value);
else if (__cursor_fix_implicit(btree, cbt)) {
/*
* Creating a record past the end of the tree in a fixed-length column-store implicitly
@@ -619,14 +594,12 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
int exact;
bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
+ session = CUR2S(cbt);
exact = 0;
WT_STAT_CONN_INCR(session, cursor_search_near);
@@ -671,7 +644,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (leaf_found &&
(cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
@@ -682,10 +655,10 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, NULL, NULL));
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
@@ -706,7 +679,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (valid) {
exact = cbt->compare;
- ret = __cursor_kv_return(cbt, upd);
+ ret = __cursor_kv_return(cbt, cbt->upd_value);
} else if (__cursor_fix_implicit(btree, cbt)) {
cbt->recno = cursor->recno;
cbt->v = 0;
@@ -781,9 +754,12 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
insert_bytes = cursor->key.size + cursor->value.size;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
+ WT_RET_PANIC_ASSERT(
+ session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
+
WT_STAT_CONN_INCR(session, cursor_insert);
WT_STAT_DATA_INCR(session, cursor_insert);
WT_STAT_CONN_INCRV(session, cursor_insert_bytes, insert_bytes);
@@ -793,9 +769,6 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_RET(__cursor_size_chk(session, &cursor->key));
WT_RET(__cursor_size_chk(session, &cursor->value));
- WT_RET_ASSERT(
- session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
-
/* It's no longer possible to bulk-load into the tree. */
__wt_cursor_disable_bulk(session);
@@ -859,7 +832,9 @@ retry:
* If not overwriting, fail if the key exists, else insert the key/value pair.
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (valid)
WT_ERR(WT_DUPLICATE_KEY);
}
@@ -885,7 +860,9 @@ retry:
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
if (cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
if (valid)
WT_ERR(WT_DUPLICATE_KEY);
} else if (__cursor_fix_implicit(btree, cbt))
@@ -932,7 +909,7 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = NULL;
if (cbt->compare != 0)
@@ -964,7 +941,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
uint64_t yield_count, sleep_usecs;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
WT_ASSERT(session, cbt->btree->type == BTREE_ROW);
@@ -1015,7 +992,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
searched = false;
@@ -1085,7 +1062,8 @@ retry:
if (cbt->compare != 0)
goto search_notfound;
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (!valid)
goto search_notfound;
@@ -1103,8 +1081,10 @@ retry:
/* Remove the record if it exists. */
valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ if (cbt->compare == 0) {
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
+ }
if (cbt->compare != 0 || !valid) {
if (!__cursor_fix_implicit(btree, cbt))
goto search_notfound;
@@ -1203,10 +1183,10 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
- WT_RET_ASSERT(
+ WT_RET_PANIC_ASSERT(
session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
/* It's no longer possible to bulk-load into the tree. */
@@ -1287,7 +1267,9 @@ update_local:
WT_ERR(__curfile_update_check(cbt));
if (cbt->compare != 0)
WT_ERR(WT_NOTFOUND);
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (!valid)
WT_ERR(WT_NOTFOUND);
}
@@ -1302,8 +1284,11 @@ update_local:
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ if (cbt->compare == 0) {
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
+ }
if ((cbt->compare != 0 || !valid) && !__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
@@ -1375,7 +1360,7 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
cursor = &cbt->iface;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
upd = NULL;
if (cbt->ins != NULL)
@@ -1429,7 +1414,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
bool overwrite;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
/* Save the cursor state. */
__cursor_state_save(cursor, &state);
@@ -1465,7 +1450,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
WT_ERR(__wt_modify_pack(cursor, entries, nentries, &modify));
orig = cursor->value.size;
- WT_ERR(__wt_modify_apply(cursor, modify->data));
+ WT_ERR(__wt_modify_apply_item(session, cursor->value_format, &cursor->value, modify->data));
new = cursor->value.size;
WT_ERR(__cursor_size_chk(session, &cursor->value));
@@ -1515,7 +1500,7 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
bool overwrite;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_reserve);
WT_STAT_DATA_INCR(session, cursor_reserve);
@@ -1542,7 +1527,7 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_update);
WT_STAT_DATA_INCR(session, cursor_update);
@@ -1568,7 +1553,7 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
a = (WT_CURSOR *)a_arg;
b = (WT_CURSOR *)b_arg;
- session = (WT_SESSION_IMPL *)a->session;
+ session = CUR2S(a_arg);
/* Confirm both cursors reference the same object. */
if (a_arg->btree != b_arg->btree)
@@ -1640,8 +1625,8 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
a = (WT_CURSOR *)a_arg;
b = (WT_CURSOR *)b_arg;
+ session = CUR2S(a_arg);
cmp = 0;
- session = (WT_SESSION_IMPL *)a->session;
/* Confirm both cursors reference the same object. */
if (a_arg->btree != b_arg->btree)
@@ -1673,7 +1658,7 @@ __cursor_truncate(
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- session = (WT_SESSION_IMPL *)start->iface.session;
+ session = CUR2S(start);
yield_count = sleep_usecs = 0;
/*
@@ -1729,7 +1714,7 @@ __cursor_truncate_fix(
uint64_t yield_count, sleep_usecs;
const uint8_t *value;
- session = (WT_SESSION_IMPL *)start->iface.session;
+ session = CUR2S(start);
yield_count = sleep_usecs = 0;
/*
@@ -1786,8 +1771,8 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)start->iface.session;
btree = start->btree;
+ session = CUR2S(start);
WT_STAT_DATA_INCR(session, cursor_truncate);
WT_RET(__wt_txn_autocommit_check(session));
@@ -1852,6 +1837,8 @@ __wt_btcur_open(WT_CURSOR_BTREE *cbt)
{
cbt->row_key = &cbt->_row_key;
cbt->tmp = &cbt->_tmp;
+ cbt->modify_update = &cbt->_modify_update;
+ cbt->upd_value = &cbt->_upd_value;
#ifdef HAVE_DIAGNOSTIC
cbt->lastkey = &cbt->_lastkey;
@@ -1869,7 +1856,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* The in-memory split and history store table code creates low-level btree cursors to
@@ -1879,6 +1866,8 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
if (!lowlevel)
ret = __cursor_reset(cbt);
+ __wt_buf_free(session, &cbt->_modify_update.buf);
+ __wt_buf_free(session, &cbt->_upd_value.buf);
__wt_buf_free(session, &cbt->_row_key);
__wt_buf_free(session, &cbt->_tmp);
#ifdef HAVE_DIAGNOSTIC
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index e3e89620fd5..a835e593022 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -156,18 +156,16 @@ __debug_item_value(WT_DBG *ds, const char *tag, const void *data_arg, size_t siz
}
/*
- * __debug_time_pairs --
+ * __debug_time_window --
* Dump a set of start and stop time pairs, with an optional tag.
*/
static inline int
-__debug_time_pairs(WT_DBG *ds, const char *tag, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__debug_time_window(WT_DBG *ds, const char *tag, WT_TIME_WINDOW *tw)
{
- char tp_string[2][WT_TP_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
- return (ds->f(ds, "\t%s%s%s,%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
- __wt_time_pair_to_string(start_ts, start_txn, tp_string[0]),
- __wt_time_pair_to_string(stop_ts, stop_txn, tp_string[1])));
+ return (ds->f(ds, "\t%s%s%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
+ __wt_time_window_to_string(tw, time_string)));
}
/*
@@ -711,15 +709,13 @@ int
__wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CURSOR *cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint32_t session_flags;
bool is_owner;
- cursor = cursor_arg;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor_arg);
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner));
@@ -741,26 +737,24 @@ __wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor)
WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(hs_value);
WT_DECL_RET;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
WT_UPDATE *upd;
- wt_timestamp_t hs_durable_ts;
- uint64_t hs_upd_type_full;
+ uint64_t hs_counter, hs_upd_type_full;
uint32_t hs_btree_id;
- uint8_t hs_prep_state, hs_upd_type;
+ uint8_t hs_upd_type;
ds = &_ds;
+ __wt_time_window_init(&tw);
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
WT_ERR(__debug_config(session, ds, NULL));
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &start.timestamp, &start.txnid,
- &stop.timestamp, &stop.txnid));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &tw.start_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type_full, hs_value));
+ WT_ERR(__debug_time_window(ds, "T", &tw));
- WT_ERR(__debug_time_pairs(ds, "T", start.timestamp, start.txnid, stop.timestamp, stop.txnid));
-
- WT_ERR(
- hs_cursor->get_value(hs_cursor, &hs_durable_ts, &hs_prep_state, &hs_upd_type_full, hs_value));
hs_upd_type = (uint8_t)hs_upd_type_full;
switch (hs_upd_type) {
case WT_UPDATE_MODIFY:
@@ -806,8 +800,7 @@ __wt_debug_key_value(
WT_ERR(ds->f(ds, "\tK {%" PRIu64 " %" PRIu64 "}", recno, rle));
else
WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
- WT_ERR(__debug_time_pairs(
- ds, "T", value->start_ts, value->start_txn, value->stop_ts, value->stop_txn));
+ WT_ERR(__debug_time_window(ds, "T", &value->tw));
WT_ERR(__debug_cell_data(ds, NULL, value != NULL ? value->type : 0, "V", value));
err:
@@ -1350,8 +1343,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
{
WT_ADDR_COPY addr;
WT_SESSION_IMPL *session;
- char tp_string[2][WT_TP_STRING_SIZE];
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
session = ds->session;
@@ -1365,13 +1357,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, ", %s", "reading"));
if (__wt_ref_addr_copy(session, ref, &addr))
- WT_RET(ds->f(ds,
- ", start/stop durable ts %s,%s, start/stop ts/txn %s,%s, prepared updates: %s, %s",
- __wt_timestamp_to_string(addr.newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(addr.newest_stop_durable_ts, ts_string[1]),
- __wt_time_pair_to_string(addr.oldest_start_ts, addr.oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(addr.newest_stop_ts, addr.newest_stop_txn, tp_string[1]),
- addr.prepare ? "true" : "false",
+ WT_RET(ds->f(ds, "%s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string),
__wt_addr_string(session, addr.addr, addr.size, ds->t1)));
return (ds->f(ds, "\n"));
}
@@ -1386,8 +1372,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_SESSION_IMPL *session;
- char tp_string[2][WT_TP_STRING_SIZE];
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
session = ds->session;
@@ -1429,11 +1414,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- WT_RET(ds->f(ds, ", ts/txn %s,%s,%s,%s",
- __wt_timestamp_to_string(unpack->newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_durable_ts, ts_string[1]),
- __wt_time_pair_to_string(unpack->oldest_start_ts, unpack->oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(unpack->newest_stop_ts, unpack->newest_stop_txn, tp_string[1])));
+ WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -1441,9 +1422,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
- WT_RET(ds->f(ds, ", ts/txn %s,%s",
- __wt_time_pair_to_string(unpack->start_ts, unpack->start_txn, tp_string[0]),
- __wt_time_pair_to_string(unpack->stop_ts, unpack->stop_txn, tp_string[1])));
+ WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 94b544f6bc2..b9a3eed1c93 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -115,7 +115,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
goto err;
if (addr.type != WT_ADDR_LEAF_NO)
goto err;
- if (!__wt_txn_visible(session, addr.oldest_start_txn, addr.oldest_start_ts))
+ if (!__wt_txn_visible(session, addr.ta.oldest_start_txn, addr.ta.oldest_start_ts))
goto err;
/*
@@ -292,7 +292,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
WT_PAGE *page;
WT_PAGE_DELETED *page_del;
WT_ROW *rip;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
WT_UPDATE **upd_array, *upd;
size_t size;
uint32_t count, i;
@@ -382,8 +382,8 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* Retrieve the stop time pair from the page's row. If we find an existing stop time pair we
* don't need to append a tombstone.
*/
- __wt_read_row_time_pairs(session, page, rip, &start, &stop);
- if (stop.timestamp == WT_TS_MAX && stop.txnid == WT_TXN_MAX) {
+ __wt_read_row_time_window(session, page, rip, &tw);
+ if (tw.stop_ts == WT_TS_MAX && tw.stop_txn == WT_TXN_MAX) {
WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
upd->next = upd_array[WT_ROW_SLOT(page, rip)];
upd_array[WT_ROW_SLOT(page, rip)] = upd;
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index d4d83783a1b..7fac3deabd5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -140,7 +140,7 @@ corrupt:
F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
if (!F_ISSET(btree, WT_BTREE_VERIFY) && !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) {
WT_TRET(bm->corrupt(bm, session, addr, addr_size));
- WT_PANIC_ERR(session, ret, "%s: fatal read error: %s", btree->dhandle->name, fail_msg);
+ WT_ERR_PANIC(session, ret, "%s: fatal read error: %s", btree->dhandle->name, fail_msg);
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index 72523b695de..cccd2c628a3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -65,7 +65,7 @@ __wt_ovfl_read(
*/
__wt_readlock(session, &S2BT(session)->ovfl_lock);
if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
- WT_ASSERT(session, __wt_txn_visible_all(session, unpack->stop_txn, unpack->stop_ts));
+ WT_ASSERT(session, __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.stop_ts));
ret = __wt_buf_setstr(session, store, "WT_CELL_VALUE_OVFL_RM");
*decoded = true;
} else
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index ac588bf901d..1a690b24804 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -544,13 +544,23 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
WT_CELL_UNPACK unpack;
+ WT_ITEM buf;
WT_ROW *rip;
+ WT_UPDATE **upd_array, *upd;
+ size_t size, total_size;
+ uint32_t i;
+ bool instantiate_prepared, prepare;
btree = S2BT(session);
+ prepare = false;
+
+ instantiate_prepared = F_ISSET_ATOMIC(page, WT_PAGE_INSTANTIATE_PREPARE_UPDATE);
/* Walk the page, building indices. */
rip = page->pg_row;
WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ if (instantiate_prepared && !prepare && F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE))
+ prepare = true;
switch (unpack.type) {
case WT_CELL_KEY_OVFL:
__wt_row_leaf_key_set_cell(page, rip, unpack.cell);
@@ -575,9 +585,9 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* The visibility information is not referenced on the page so we need to ensure that
* the value is globally visible at the point in time where we read the page into cache.
*/
- if (!btree->huffman_value && unpack.stop_txn == WT_TXN_MAX &&
- unpack.stop_ts == WT_TS_MAX &&
- __wt_txn_visible_all(session, unpack.start_txn, unpack.start_ts))
+ if (!btree->huffman_value && unpack.tw.stop_txn == WT_TXN_MAX &&
+ unpack.tw.stop_ts == WT_TS_MAX && !F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE) &&
+ __wt_txn_visible_all(session, unpack.tw.start_txn, unpack.tw.start_ts))
__wt_row_leaf_value_set(page, rip - 1, &unpack);
break;
case WT_CELL_VALUE_OVFL:
@@ -589,8 +599,47 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_CELL_FOREACH_END;
/*
- * We do not currently instantiate keys on leaf pages when the page is loaded, they're
- * instantiated on demand.
+ * Instantiate prepared updates on leaf pages when the page is loaded. For in-memory databases,
+ * all non obsolete updates will retain on the page as part of __split_multi_inmem function.
*/
+ if (prepare && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
+ WT_RET(__wt_page_modify_init(session, page));
+ if (!F_ISSET(btree, WT_BTREE_READONLY))
+ __wt_page_modify_set(session, page);
+
+ /* Allocate the per-page update array if one doesn't already exist. */
+ if (page->entries != 0 && page->modify->mod_row_update == NULL)
+ WT_RET(__wt_calloc_def(session, page->entries, &page->modify->mod_row_update));
+
+ /* For each entry in the page */
+ size = total_size = 0;
+ upd_array = page->modify->mod_row_update;
+ WT_ROW_FOREACH (page, rip, i) {
+ /* Unpack the on-page value cell. */
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ if (F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE)) {
+ if (unpack.tw.stop_ts == WT_TS_MAX && unpack.tw.stop_txn == WT_TXN_MAX) {
+ /* Take the value from the original page cell. */
+ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &buf));
+
+ WT_RET(__wt_upd_alloc(session, &buf, WT_UPDATE_STANDARD, &upd, &size));
+ upd->durable_ts = WT_TS_NONE;
+ upd->start_ts = unpack.tw.start_ts;
+ upd->txnid = unpack.tw.start_txn;
+ } else {
+ WT_RET(__wt_upd_alloc_tombstone(session, &upd, &size));
+ upd->durable_ts = WT_TS_NONE;
+ upd->start_ts = unpack.tw.stop_ts;
+ upd->txnid = unpack.tw.stop_txn;
+ }
+ upd->prepare_state = WT_PREPARE_INPROGRESS;
+ upd_array[WT_ROW_SLOT(page, rip)] = upd;
+ total_size += size;
+ }
+ }
+
+ __wt_cache_page_inmem_incr(session, page, total_size);
+ }
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index b3a8985fbe4..3f113e4b2dc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -13,10 +13,8 @@
* Check if the inserted key/value pair is valid.
*/
static int
-__random_insert_valid(
- WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_INSERT *ins, WT_UPDATE **updp, bool *validp)
+__random_insert_valid(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_INSERT *ins, bool *validp)
{
- *updp = NULL;
*validp = false;
__cursor_pos_clear(cbt);
@@ -27,7 +25,7 @@ __random_insert_valid(
cbt->tmp->data = WT_INSERT_KEY(ins);
cbt->tmp->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, updp, validp));
+ return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, validp));
}
/*
@@ -35,16 +33,15 @@ __random_insert_valid(
* Check if the slot key/value pair is valid.
*/
static int
-__random_slot_valid(WT_CURSOR_BTREE *cbt, uint32_t slot, WT_UPDATE **updp, bool *validp)
+__random_slot_valid(WT_CURSOR_BTREE *cbt, uint32_t slot, bool *validp)
{
- *updp = NULL;
*validp = false;
__cursor_pos_clear(cbt);
cbt->slot = slot;
cbt->compare = 0;
- return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, updp, validp));
+ return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, validp));
}
/* Magic constant: 5000 entries in a skip list is enough to forcibly evict. */
@@ -64,7 +61,7 @@ __random_skip_entries(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head)
uint32_t entries;
int level;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
entries = 0; /* [-Wconditional-uninitialized] */
if (ins_head == NULL)
@@ -106,18 +103,16 @@ __random_skip_entries(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head)
* Return a random key/value from a skip list.
*/
static int
-__random_leaf_skip(
- WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, uint32_t entries, WT_UPDATE **updp, bool *validp)
+__random_leaf_skip(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, uint32_t entries, bool *validp)
{
WT_INSERT *ins, *saved_ins;
WT_SESSION_IMPL *session;
uint32_t i;
int retry;
- *updp = NULL;
*validp = false;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* This is a relatively expensive test, try a few times then quit. */
for (retry = 0; retry < WT_RANDOM_SKIP_RETRY; ++retry) {
@@ -136,7 +131,7 @@ __random_leaf_skip(
/* Try and return our selected record. */
if (ins != NULL) {
- WT_RET(__random_insert_valid(cbt, ins_head, ins, updp, validp));
+ WT_RET(__random_insert_valid(cbt, ins_head, ins, validp));
if (*validp)
return (0);
}
@@ -148,7 +143,7 @@ __random_leaf_skip(
ins = saved_ins;
}
for (; --i > 0 && ins != NULL; ins = WT_SKIP_NEXT(ins)) {
- WT_RET(__random_insert_valid(cbt, ins_head, ins, updp, validp));
+ WT_RET(__random_insert_valid(cbt, ins_head, ins, validp));
if (*validp)
return (0);
}
@@ -166,24 +161,23 @@ __random_leaf_skip(
* Look for a large insert list from which we can select a random item.
*/
static int
-__random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
+__random_leaf_insert(WT_CURSOR_BTREE *cbt, bool *validp)
{
WT_INSERT_HEAD *ins_head;
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t entries, slot, start;
- *updp = NULL;
*validp = false;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* Check for a large insert list with no items, that's common when tables are newly created. */
ins_head = WT_ROW_INSERT_SMALLEST(page);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_SMALLEST_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -199,7 +193,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT(page, &page->pg_row[slot]);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -208,7 +202,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT(page, &page->pg_row[slot]);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -219,7 +213,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT_SMALLEST(page);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -234,25 +228,24 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
* Return a random key/value from a page's on-disk entries.
*/
static int
-__random_leaf_disk(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
+__random_leaf_disk(WT_CURSOR_BTREE *cbt, bool *validp)
{
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t entries, slot;
int retry;
- *updp = NULL;
*validp = false;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
entries = cbt->ref->page->entries;
/* This is a relatively cheap test, so try several times. */
for (retry = 0; retry < WT_RANDOM_DISK_RETRY; ++retry) {
slot = __wt_random(&session->rnd) % entries;
WT_RET(__wt_row_leaf_key(session, page, page->pg_row + slot, cbt->tmp, false));
- WT_RET(__random_slot_valid(cbt, slot, updp, validp));
+ WT_RET(__random_slot_valid(cbt, slot, validp));
if (*validp)
break;
}
@@ -274,12 +267,11 @@ __random_leaf(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint32_t i;
bool next, valid;
- cursor = (WT_CURSOR *)cbt;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ cursor = &cbt->iface;
+ session = CUR2S(cbt);
/*
* If the page has a sufficiently large number of disk-based entries, randomly select from them.
@@ -287,24 +279,24 @@ __random_leaf(WT_CURSOR_BTREE *cbt)
* a reasonable chunk of the name space.
*/
if (cbt->ref->page->entries > WT_RANDOM_DISK_ENOUGH) {
- WT_RET(__random_leaf_disk(cbt, &upd, &valid));
+ WT_RET(__random_leaf_disk(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
}
/* Look for any large insert list and select from it. */
- WT_RET(__random_leaf_insert(cbt, &upd, &valid));
+ WT_RET(__random_leaf_insert(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
/*
* Try again if there are at least a few hundred disk-based entries: this may be a normal leaf
* page with big items.
*/
if (cbt->ref->page->entries > WT_RANDOM_DISK_ENOUGH / 2) {
- WT_RET(__random_leaf_disk(cbt, &upd, &valid));
+ WT_RET(__random_leaf_disk(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
}
/*
@@ -484,7 +476,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
read_flags = WT_READ_RESTART_OK;
if (F_ISSET(cbt, WT_CBT_READ_ONCE))
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 5c8c0ea871a..4d83914e1a3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -157,6 +157,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* evicting that page and deciding that is a sign that eviction is unstuck.
*/
page_flags = WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
+ FLD_SET(page_flags, WT_PAGE_INSTANTIATE_PREPARE_UPDATE);
if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &notused));
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 353f159f6bb..5f29cf08691 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -76,13 +76,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session, const uint8_t *key, size_t key
WT_RET(__wt_calloc_one(session, &copy_addr));
copy->addr = copy_addr;
- copy_addr->newest_start_durable_ts = unpack->newest_start_durable_ts;
- copy_addr->oldest_start_ts = unpack->oldest_start_ts;
- copy_addr->oldest_start_txn = unpack->oldest_start_txn;
- copy_addr->newest_stop_durable_ts = unpack->newest_stop_durable_ts;
- copy_addr->newest_stop_ts = unpack->newest_stop_ts;
- copy_addr->newest_stop_txn = unpack->newest_stop_txn;
- copy_addr->prepare = F_ISSET(unpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&copy_addr->ta, &unpack->ta);
WT_RET(__wt_memdup(session, unpack->data, unpack->size, &copy_addr->addr));
copy_addr->size = (uint8_t)unpack->size;
copy_addr->type = unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 2061d561a7a..1a2360f6d09 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -23,7 +23,7 @@ __key_return(WT_CURSOR_BTREE *cbt)
page = cbt->ref->page;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -70,102 +70,71 @@ __key_return(WT_CURSOR_BTREE *cbt)
}
/*
- * __time_pairs_init --
- * Initialize the time pairs to globally visible.
+ * __read_col_time_window --
+ * Retrieve the time window from a column store cell.
*/
-static inline void
-__time_pairs_init(WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+static void
+__read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_WINDOW *tw)
{
- start->txnid = WT_TXN_NONE;
- start->timestamp = WT_TS_NONE;
- stop->txnid = WT_TXN_MAX;
- stop->timestamp = WT_TS_MAX;
+ WT_CELL_UNPACK unpack;
+
+ __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __time_pairs_set --
- * Set the time pairs.
+ * __wt_read_row_time_window --
+ * Retrieve the time window from a row.
*/
-static inline void
-__time_pairs_set(WT_TIME_PAIR *start, WT_TIME_PAIR *stop, WT_CELL_UNPACK *unpack)
+void
+__wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_WINDOW *tw)
{
- start->timestamp = unpack->start_ts;
- start->txnid = unpack->start_txn;
- stop->timestamp = unpack->stop_ts;
- stop->txnid = unpack->stop_txn;
+ WT_CELL_UNPACK unpack;
+
+ __wt_time_window_init(tw);
+ /*
+ * If a value is simple and is globally visible at the time of reading a page into cache, we set
+ * the time pairs as globally visible.
+ */
+ if (__wt_row_leaf_value_exists(rip))
+ return;
+
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __wt_read_cell_time_pairs --
+ * __wt_read_cell_time_window --
* Read the time pairs from the cell.
*/
void
-__wt_read_cell_time_pairs(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_read_cell_time_window(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_WINDOW *tw)
{
WT_PAGE *page;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = ref->page;
- WT_ASSERT(session, start != NULL && stop != NULL);
+ WT_ASSERT(session, tw != NULL);
/* Take the value from the original page cell. */
if (page->type == WT_PAGE_ROW_LEAF) {
- __wt_read_row_time_pairs(session, page, &page->pg_row[cbt->slot], start, stop);
+ __wt_read_row_time_window(session, page, &page->pg_row[cbt->slot], tw);
} else if (page->type == WT_PAGE_COL_VAR) {
- __wt_read_col_time_pairs(
- session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), start, stop);
+ __read_col_time_window(session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), tw);
} else {
/* WT_PAGE_COL_FIX: return the default time pairs. */
- __time_pairs_init(start, stop);
+ __wt_time_window_init(tw);
}
}
/*
- * __wt_read_col_time_pairs --
- * Retrieve the time pairs from a column store cell.
- */
-void
-__wt_read_col_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __wt_cell_unpack(session, page, cell, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
- * __wt_read_row_time_pairs --
- * Retrieve the time pairs from a row.
- */
-void
-__wt_read_row_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __time_pairs_init(start, stop);
- /*
- * If a value is simple and is globally visible at the time of reading a page into cache, we set
- * the time pairs as globally visible.
- */
- if (__wt_row_leaf_value_exists(rip))
- return;
-
- __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
* __wt_value_return_buf --
* Change a buffer to reference an internal original-page return value.
*/
int
-__wt_value_return_buf(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_WINDOW *tw)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -176,18 +145,12 @@ __wt_value_return_buf(
WT_SESSION_IMPL *session;
uint8_t v;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = ref->page;
cursor = &cbt->iface;
- if (start != NULL && stop != NULL)
- __time_pairs_init(start, stop);
-
- /* Must provide either both start and stop as output parameters or neither. */
- WT_ASSERT(session, (start != NULL && stop != NULL) || (start == NULL && stop == NULL));
-
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -195,14 +158,16 @@ __wt_value_return_buf(
* If a value is simple and is globally visible at the time of reading a page into cache, we
* encode its location into the WT_ROW.
*/
- if (__wt_row_leaf_value(page, rip, buf))
+ if (__wt_row_leaf_value(page, rip, buf)) {
+ if (tw != NULL)
+ __wt_time_window_init(tw);
return (0);
+ }
/* Take the value from the original page cell. */
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
@@ -210,17 +175,18 @@ __wt_value_return_buf(
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
__wt_cell_unpack(session, page, cell, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
/*
* WT_PAGE_COL_FIX: Take the value from the original page.
*
- * FIXME-PM-1523: Should also check visibility here
+ * FIXME-WT-6126: Should also check visibility here
*/
+ if (tw != NULL)
+ __wt_time_window_init(tw);
v = __bit_getv_recno(ref, cursor->recno, btree->bitcnt);
return (__wt_buf_set(session, buf, &v, 1));
}
@@ -232,95 +198,7 @@ __wt_value_return_buf(
static inline int
__value_return(WT_CURSOR_BTREE *cbt)
{
- return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL, NULL));
-}
-
-/*
- * __wt_value_return_upd --
- * Change the cursor to reference an internal update structure return value.
- */
-int
-__wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
-{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_MODIFY_VECTOR modifies;
- WT_SESSION_IMPL *session;
- WT_TIME_PAIR start, stop;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- __wt_modify_vector_init(session, &modifies);
-
- /*
- * We're passed a "standard" or "modified" update that's visible to us. Our caller should have
- * already checked for deleted items (we're too far down the call stack to return not-found).
- *
- * Fast path if it's a standard item, assert our caller's behavior.
- */
- if (upd->type == WT_UPDATE_STANDARD) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- /* Copy an external update, and delete after using it */
- WT_RET(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- __wt_free_update_list(session, &upd);
- } else {
- cursor->value.data = upd->data;
- cursor->value.size = upd->size;
- }
- return (0);
- }
- WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
-
- /*
- * Find a complete update.
- */
- for (; upd != NULL; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (WT_UPDATE_DATA_VALUE(upd))
- break;
-
- if (upd->type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
- }
-
- /*
- * If there's no full update, the base item is the on-page item. If the update is a tombstone,
- * the base item is an empty item.
- */
- if (upd == NULL) {
- /*
- * Callers of this function set the cursor slot to an impossible value to check we don't try
- * and return on-page values when the update list should have been sufficient (which
- * happens, for example, if an update list was truncated, deleting some standard update
- * required by a previous modify update). Assert the case.
- */
- WT_ASSERT(session, cbt->slot != UINT32_MAX);
-
- WT_ERR(__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, &start, &stop));
- /*
- * Applying modifies on top of a tombstone is invalid. So if we're using the onpage value,
- * the stop time pair should be unset.
- */
- WT_ASSERT(session, stop.txnid == WT_TXN_MAX && stop.timestamp == WT_TS_MAX);
- } else {
- /* The base update must not be a tombstone. */
- WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
- WT_ERR(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- }
-
- /*
- * Once we have a base item, roll forward through any visible modify updates.
- */
- while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &upd);
- WT_ERR(__wt_modify_apply(cursor, upd->data));
- }
-
-err:
- __wt_modify_vector_free(&modifies);
- return (ret);
+ return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL));
}
/*
@@ -352,20 +230,37 @@ __wt_key_return(WT_CURSOR_BTREE *cbt)
/*
* __wt_value_return --
- * Change the cursor to reference an internal return value.
+ * Change the cursor to reference an update return value.
*/
int
-__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
{
WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
cursor = &cbt->iface;
+ session = CUR2S(cbt);
F_CLR(cursor, WT_CURSTD_VALUE_EXT);
- if (upd == NULL)
+ if (upd_value->type == WT_UPDATE_INVALID) {
+ /*
+ * FIXME-WT-6127: This is a holdover from the pre-durable history read logic where we used
+ * to fallback to the on-page value if we didn't find a visible update elsewhere. This is
+ * still required for fixed length column store as we have issues with this table type in
+ * durable history which we're planning to address in PM-1814.
+ */
+ WT_ASSERT(session, cbt->btree->type == BTREE_COL_FIX);
WT_RET(__value_return(cbt));
- else
- WT_RET(__wt_value_return_upd(cbt, upd));
+ } else {
+ /*
+ * We're passed a "standard" update that's visible to us. Our caller should have already
+ * checked for deleted items (we're too far down the call stack to return not-found) and any
+ * modify updates should be have been reconstructed into a full standard update.
+ */
+ WT_ASSERT(session, upd_value->type == WT_UPDATE_STANDARD);
+ cursor->value.data = upd_value->buf.data;
+ cursor->value.size = upd_value->buf.size;
+ }
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index a42e11e1d8f..344c6a573d7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -186,12 +186,7 @@ __slvg_checkpoint(WT_SESSION_IMPL *session, WT_REF *root)
__wt_seconds(session, &ckptbase->sec);
WT_ERR(__wt_metadata_search(session, dhandle->name, &config));
WT_ERR(__wt_meta_block_metadata(session, config, ckptbase));
- ckptbase->start_durable_ts = WT_TS_NONE;
- ckptbase->oldest_start_ts = WT_TS_NONE;
- ckptbase->oldest_start_txn = WT_TXN_NONE;
- ckptbase->stop_durable_ts = WT_TS_NONE;
- ckptbase->newest_stop_ts = WT_TS_MAX;
- ckptbase->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_aggregate_init(&ckptbase->ta);
ckptbase->write_gen = btree->write_gen;
F_SET(ckptbase, WT_CKPT_ADD);
@@ -917,7 +912,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
*/
/* Case #2/8, #10, #11 */
if (a_trk->col_start > b_trk->col_start)
- WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+ WT_RET_PANIC(session, EINVAL, "unexpected merge array sort order");
if (a_trk->col_start == b_trk->col_start) { /* Case #1, #4 and #9 */
/*
@@ -1174,12 +1169,7 @@ __slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
* regardless of a value's timestamps or transaction IDs.
*/
WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_start_durable_ts = addr->newest_stop_durable_ts = addr->oldest_start_ts =
- WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- addr->prepare = false;
+ __wt_time_aggregate_init(&addr->ta);
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
addr->size = trk->trk_addr_size;
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
@@ -1323,7 +1313,7 @@ __slvg_col_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *
return (__slvg_ovfl_ref(session, ovfl, false));
}
- WT_PANIC_RET(session, EINVAL, "overflow record at column-store page merge not found");
+ WT_RET_PANIC(session, EINVAL, "overflow record at column-store page merge not found");
}
/*
@@ -1512,7 +1502,7 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
WT_RET(__wt_compare(session, btree->collator, A_TRK_STOP, B_TRK_STOP, &stop_cmp));
if (start_cmp > 0) /* Case #2/8, #10, #11 */
- WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+ WT_RET_PANIC(session, EINVAL, "unexpected merge array sort order");
if (start_cmp == 0) { /* Case #1, #4, #9 */
/*
@@ -1782,12 +1772,7 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
* regardless of a value's timestamps or transaction IDs.
*/
WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_start_durable_ts = addr->newest_stop_durable_ts = addr->oldest_start_ts =
- WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- addr->prepare = false;
+ __wt_time_aggregate_init(&addr->ta);
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
addr->size = trk->trk_addr_size;
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
@@ -1992,7 +1977,7 @@ __slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *
return (__slvg_ovfl_ref(session, ovfl, true));
}
- WT_PANIC_RET(session, EINVAL, "overflow record at row-store page merge not found");
+ WT_RET_PANIC(session, EINVAL, "overflow record at row-store page merge not found");
}
/*
@@ -2270,7 +2255,7 @@ __slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk, bool multi_panic)
if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
if (!multi_panic)
return (__wt_set_return(session, EBUSY));
- WT_PANIC_RET(session, EINVAL,
+ WT_RET_PANIC(session, EINVAL,
"overflow record unexpectedly referenced multiple times "
"during leaf page merge");
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index a2d85f79db8..2a016d6d725 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -147,7 +147,7 @@ __split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page)
err:
/* Something really bad just happened. */
- WT_PANIC_RET(session, ret, "fatal error during page split");
+ WT_RET_PANIC(session, ret, "fatal error during page split");
}
#endif
@@ -249,13 +249,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_ref
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
__wt_cell_unpack(session, from_home, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
- addr->oldest_start_ts = unpack.oldest_start_ts;
- addr->oldest_start_txn = unpack.oldest_start_txn;
- addr->newest_start_durable_ts = unpack.newest_start_durable_ts;
- addr->newest_stop_ts = unpack.newest_stop_ts;
- addr->newest_stop_txn = unpack.newest_stop_txn;
- addr->newest_stop_durable_ts = unpack.newest_stop_durable_ts;
- addr->prepare = F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&addr->ta, &unpack.ta);
WT_ERR(__wt_memdup(session, unpack.data, unpack.size, &addr->addr));
addr->size = (uint8_t)unpack.size;
switch (unpack.raw) {
@@ -574,17 +568,17 @@ err:
case WT_ERR_RETURN:
__wt_free_ref_index(session, root, alloc_index, true);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during root page split to deepen the tree");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during root page split "
- "to deepen the tree");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret,
+ "ignoring not-fatal error during root page split to deepen the tree");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during root page split to deepen the tree");
break;
}
return (ret);
@@ -877,17 +871,16 @@ err:
if (empty_parent)
ret = __wt_set_return(session, EBUSY);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during parent page split");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during parent page "
- "split");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret, "ignoring not-fatal error during parent page split");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during parent page split");
break;
}
__wt_scr_free(session, &scr);
@@ -1154,17 +1147,16 @@ err:
}
__wt_free_ref_index(session, page, alloc_index, true);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during internal page split");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during internal page "
- "split");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret, "ignoring not-fatal error during internal page split");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during internal page split");
break;
}
return (ret);
@@ -1391,7 +1383,7 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
WT_SAVE_UPD *supd;
WT_UPDATE *prev_onpage, *upd;
uint64_t recno;
- uint32_t i, slot;
+ uint32_t i, page_flags, slot;
/*
* In 04/2016, we removed column-store record numbers from the WT_PAGE structure, leading to
@@ -1413,7 +1405,8 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
* our caller will not discard the disk image when discarding the original page, and our caller
* will discard the allocated page on error, when discarding the allocated WT_REF.
*/
- WT_RET(__wt_page_inmem(session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, &page));
+ page_flags = WT_PAGE_DISK_ALLOC | WT_PAGE_INSTANTIATE_PREPARE_UPDATE;
+ WT_RET(__wt_page_inmem(session, ref, multi->disk_image, page_flags, &page));
multi->disk_image = NULL;
/*
@@ -1704,13 +1697,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_R
if (multi->addr.addr != NULL) {
WT_RET(__wt_calloc_one(session, &addr));
ref->addr = addr;
- addr->oldest_start_ts = multi->addr.oldest_start_ts;
- addr->oldest_start_txn = multi->addr.oldest_start_txn;
- addr->newest_start_durable_ts = multi->addr.newest_start_durable_ts;
- addr->newest_stop_ts = multi->addr.newest_stop_ts;
- addr->newest_stop_txn = multi->addr.newest_stop_txn;
- addr->newest_stop_durable_ts = multi->addr.newest_stop_durable_ts;
- addr->prepare = multi->addr.prepare;
+ __wt_time_aggregate_copy(&addr->ta, &multi->addr.ta);
WT_RET(__wt_memdup(session, multi->addr.addr, multi->addr.size, &addr->addr));
addr->size = multi->addr.size;
addr->type = multi->addr.type;
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 851a407f165..fd36f6b24f9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -220,8 +220,8 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
if (previous_state == WT_REF_DISK) {
/* There should be an address, but simply skip any page where we don't find one. */
if (__wt_ref_addr_copy(session, ref, &addr)) {
- newest_stop_ts = addr.newest_stop_ts;
- newest_stop_txn = addr.newest_stop_txn;
+ newest_stop_ts = addr.ta.newest_stop_ts;
+ newest_stop_txn = addr.ta.newest_stop_txn;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
}
@@ -274,21 +274,21 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
/* Calculate the max stop time pair by traversing all multi addresses. */
for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- newest_stop_txn = WT_MAX(newest_stop_txn, multi->addr.newest_stop_txn);
- newest_stop_ts = WT_MAX(newest_stop_ts, multi->addr.newest_stop_ts);
+ newest_stop_txn = WT_MAX(newest_stop_txn, multi->addr.ta.newest_stop_txn);
+ newest_stop_ts = WT_MAX(newest_stop_ts, multi->addr.ta.newest_stop_ts);
}
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) {
tag = "reconciled replacement block";
- newest_stop_txn = mod->mod_replace.newest_stop_txn;
- newest_stop_ts = mod->mod_replace.newest_stop_ts;
+ newest_stop_txn = mod->mod_replace.ta.newest_stop_txn;
+ newest_stop_ts = mod->mod_replace.ta.newest_stop_ts;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else if (__wt_ref_addr_copy(session, ref, &addr)) {
tag = "WT_REF address";
- newest_stop_txn = addr.newest_stop_txn;
- newest_stop_ts = addr.newest_stop_ts;
+ newest_stop_txn = addr.ta.newest_stop_txn;
+ newest_stop_ts = addr.ta.newest_stop_ts;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else
tag = "unexpected page state";
@@ -469,12 +469,7 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
btree->syncing = WT_BTREE_SYNC_RUNNING;
is_hs = WT_IS_HS(btree);
- /*
- * Add in history store reconciliation for standard files.
- *
- * FIXME-PM-1521: Remove the history store check, and assert that no updates from the
- * history store are copied to the history store recursively.
- */
+ /* Add in history store reconciliation for standard files. */
rec_flags = WT_REC_CHECKPOINT;
if (!is_hs && !WT_IS_METADATA(btree->dhandle))
rec_flags |= WT_REC_HS;
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index c9708e9511b..0b3d4da2459 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -255,17 +255,13 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
* Create a fake, unpacked parent cell for the tree based on the checkpoint information.
*/
memset(&addr_unpack, 0, sizeof(addr_unpack));
- addr_unpack.newest_start_durable_ts = ckpt->start_durable_ts;
- addr_unpack.newest_stop_durable_ts = ckpt->stop_durable_ts;
- addr_unpack.oldest_start_ts = ckpt->oldest_start_ts;
- addr_unpack.newest_stop_ts = ckpt->newest_stop_ts;
- if (ckpt->write_gen > S2C(session)->base_write_gen) {
- addr_unpack.oldest_start_txn = ckpt->oldest_start_txn;
- addr_unpack.newest_stop_txn = ckpt->newest_stop_txn;
- } else {
- addr_unpack.oldest_start_txn = WT_TXN_NONE;
- addr_unpack.newest_stop_txn = WT_TXN_MAX;
+ __wt_time_aggregate_copy(&addr_unpack.ta, &ckpt->ta);
+ if (ckpt->write_gen <= S2C(session)->base_write_gen) {
+ addr_unpack.ta.oldest_start_txn = WT_TXN_NONE;
+ addr_unpack.ta.newest_stop_txn = WT_TXN_MAX;
}
+ if (ckpt->ta.prepare)
+ F_SET(&addr_unpack, WT_CELL_UNPACK_PREPARE);
addr_unpack.raw = WT_CELL_ADDR_INT;
/* Verify the tree. */
@@ -367,15 +363,14 @@ __verify_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
WT_ADDR_COPY addr;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- char tp_string[2][WT_TP_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
if (__wt_ref_addr_copy(session, ref, &addr)) {
- WT_ERR(__wt_buf_fmt(session, buf, "%s %s,%s",
- __wt_addr_string(session, addr.addr, addr.size, tmp),
- __wt_time_pair_to_string(addr.oldest_start_ts, addr.oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(addr.newest_stop_ts, addr.newest_stop_txn, tp_string[1])));
+ WT_ERR(
+ __wt_buf_fmt(session, buf, "%s %s", __wt_addr_string(session, addr.addr, addr.size, tmp),
+ __wt_time_aggregate_to_string(&addr.ta, time_string)));
} else
WT_ERR(__wt_buf_fmt(session, buf, "%s -/-,-/-", __wt_addr_string(session, NULL, 0, tmp)));
@@ -391,28 +386,41 @@ err:
static int
__verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
- if (unpack->oldest_start_ts != WT_TS_NONE && unpack->newest_stop_ts == WT_TS_NONE)
+ if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has a newest stop "
"timestamp of 0",
__verify_addr_string(session, ref, vs->tmp1));
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
- "timestamp %s newer than its newest stop timestamp %s",
+ "timestamp newer than its newest stop timestamp; time window %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
- "transaction (%" PRIu64
- ") newer than its newest stop "
- "transaction (%" PRIu64 ")",
- __verify_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn,
- unpack->newest_stop_txn);
+ "transaction newer than its newest stop "
+ "transaction; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has an oldest start "
+ "timestamp newer than its newest start durable "
+ "timestamp; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
+ unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has a newest stop "
+ "timestamp newer than its newest stop durable "
+ "timestamp; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
return (0);
}
@@ -781,7 +789,7 @@ __verify_ts_stable_cmp(WT_SESSION_IMPL *session, WT_ITEM *key, WT_REF *ref, uint
{
WT_BTREE *btree;
WT_DECL_RET;
- char tp_string[2][WT_TP_STRING_SIZE];
+ char tp_string[2][WT_TS_INT_STRING_SIZE];
bool start;
btree = S2BT(session);
@@ -949,7 +957,7 @@ __verify_page_content(
uint64_t recno, rle;
uint32_t cell_num;
uint8_t *p;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
bool found_ovfl;
btree = S2BT(session);
@@ -992,108 +1000,126 @@ __verify_page_content(
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- if (unpack.oldest_start_ts != WT_TS_NONE && unpack.newest_stop_ts == WT_TS_NONE)
+ if (unpack.ta.oldest_start_ts != WT_TS_NONE && unpack.ta.newest_stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "newest stop timestamp of 0",
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
- if (unpack.oldest_start_ts > unpack.newest_stop_ts)
+ "newest stop timestamp of 0; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.oldest_start_ts > unpack.ta.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has an "
- "oldest start timestamp %s newer than "
- "its newest stop timestamp %s",
+ "oldest start timestamp newer than "
+ "its newest stop timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack.oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack.newest_stop_ts, ts_string[1]));
- if (unpack.oldest_start_txn > unpack.newest_stop_txn) {
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.oldest_start_txn > unpack.ta.newest_stop_txn) {
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has an "
- "oldest start transaction (%" PRIu64
- ") "
- "newer than its newest stop transaction "
- "(%" PRIu64 ")",
+ " on page "
+ "at %s has an oldest start transaction newer than "
+ "its newest stop transaction; time aggregate %s ",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- unpack.oldest_start_txn, unpack.newest_stop_txn);
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
}
+ if (unpack.ta.oldest_start_ts > unpack.ta.newest_start_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32
+ " on page at %s has an "
+ "oldest start timestamp newer than "
+ "its newest start durable timestamp; time aggregate %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.newest_stop_ts != WT_TS_MAX &&
+ unpack.ta.newest_stop_ts > unpack.ta.newest_stop_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32
+ " on page at %s has a "
+ "newest stop timestamp newer than "
+ "its newest stop durable timestamp; time aggregate %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
- * unpack.newest_start_durable_ts, "start durable",
- * addr_unpack->newest_start_durable_ts, false, vs));
- */
+ if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
+ unpack.ta.newest_start_durable_ts, "start durable",
+ addr_unpack->ta.newest_start_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.oldest_start_ts, "oldest start", addr_unpack->oldest_start_ts, true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.oldest_start_txn, "oldest start", addr_unpack->oldest_start_txn, true, dsk,
+ unpack.ta.oldest_start_ts, "oldest start", addr_unpack->ta.oldest_start_ts, true,
vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
+ unpack.ta.oldest_start_txn, "oldest start", addr_unpack->ta.oldest_start_txn, true,
+ dsk, vs));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
- * unpack.newest_stop_durable_ts, "stop durable", addr_unpack->newest_stop_durable_ts,
- * false, vs));
- */
+ if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
+ unpack.ta.newest_stop_durable_ts, "stop durable",
+ addr_unpack->ta.newest_stop_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.newest_stop_ts, "newest stop", addr_unpack->newest_stop_ts, false, vs));
+ unpack.ta.newest_stop_ts, "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.newest_stop_txn, "newest stop", addr_unpack->newest_stop_txn, false, dsk, vs));
- WT_RET(__verify_ts_stable_cmp(
- session, NULL, ref, cell_num - 1, addr_unpack->start_ts, addr_unpack->stop_ts, vs));
+ unpack.ta.newest_stop_txn, "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk,
+ vs));
+ WT_RET(__verify_ts_stable_cmp(session, NULL, ref, cell_num - 1,
+ addr_unpack->ta.oldest_start_ts, addr_unpack->ta.newest_stop_ts, vs));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
case WT_CELL_VALUE_COPY:
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_SHORT:
- if (unpack.start_ts != WT_TS_NONE && unpack.stop_ts == WT_TS_NONE)
+ if (unpack.tw.start_ts != WT_TS_NONE && unpack.tw.stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
- if (unpack.start_ts > unpack.stop_ts)
+ "timestamp of 0; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_ts > unpack.tw.stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "start timestamp %s newer than its stop "
- "timestamp %s",
+ "start timestamp newer than its stop "
+ "timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack.start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack.stop_ts, ts_string[1]));
- if (unpack.start_txn > unpack.stop_txn)
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_txn > unpack.tw.stop_txn)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "start transaction %" PRIu64
- "newer than "
- "its stop transaction %" PRIu64,
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), unpack.start_txn,
- unpack.stop_txn);
-
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(
- * __verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.durable_start_ts,
- * "durable start", addr_unpack->newest_start_durable_ts, true, vs));
- */
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_ts,
- "oldest start", addr_unpack->oldest_start_ts, true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_txn,
- "oldest start", addr_unpack->oldest_start_txn, true, dsk, vs));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start",
- * unpack.durable_stop_ts,
- * "durable stop", addr_unpack->newest_stop_durable_ts, true, vs));
- */
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_ts,
- "newest stop", addr_unpack->newest_stop_ts, false, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_txn,
- "newest stop", addr_unpack->newest_stop_txn, false, dsk, vs));
+ "start transaction newer than "
+ "its stop transaction; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_ts > unpack.tw.durable_start_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "start timestamp newer than its start durable "
+ "timestamp; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.stop_ts != WT_TS_MAX && unpack.tw.stop_ts > unpack.tw.durable_stop_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "stop timestamp newer than its stop durable "
+ "timestamp; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+
+ if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
+ unpack.tw.durable_start_ts, "newest durable start",
+ addr_unpack->ta.newest_start_durable_ts, false, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_ts,
+ "oldest start", addr_unpack->ta.oldest_start_ts, true, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_txn,
+ "oldest start", addr_unpack->ta.oldest_start_txn, true, dsk, vs));
+ if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
+ unpack.tw.durable_stop_ts, "newest durable stop",
+ addr_unpack->ta.newest_stop_durable_ts, false, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_ts,
+ "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_txn,
+ "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk, vs));
WT_RET(__verify_ts_stable_cmp(
- session, NULL, ref, cell_num - 1, unpack.start_ts, unpack.stop_ts, vs));
+ session, NULL, ref, cell_num - 1, unpack.tw.start_ts, unpack.tw.stop_ts, vs));
break;
}
@@ -1106,7 +1132,7 @@ __verify_page_content(
continue;
WT_RET(__wt_row_leaf_key(session, page, rip++, vs->tmp1, false));
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
@@ -1117,7 +1143,7 @@ __verify_page_content(
p = vs->tmp1->mem;
WT_RET(__wt_vpack_uint(&p, 0, recno));
vs->tmp1->size = WT_PTRDIFF(p, vs->tmp1->mem);
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index 7b80327a22c..a1e96d41dc9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -268,7 +268,7 @@ static int
__verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t cell_num,
WT_ADDR *addr, const char *tag, const WT_PAGE_HEADER *dsk)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
/*
* Check timestamp and transaction order, and optionally against parent values. Timestamps and
@@ -284,43 +284,57 @@ __verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- if (unpack->oldest_start_ts != WT_TS_NONE && unpack->newest_stop_ts == WT_TS_NONE)
+ if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a newest stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ "timestamp of 0; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest "
+ "start timestamp newer than its newest stop "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has an oldest "
- "start timestamp %s newer than its newest stop "
- "timestamp %s",
- cell_num - 1, tag, __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ "start transaction newer than its "
+ "newest stop transaction; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has an oldest "
- "start transaction %" PRIu64
- " newer than its "
- "newest stop transaction %" PRIu64,
- cell_num - 1, tag, unpack->oldest_start_txn, unpack->newest_stop_txn);
+ "start timestamp newer than its newest start durable "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
+ unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a newest "
+ "stop timestamp newer than its newest stop durable "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
if (addr == NULL)
break;
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
- unpack->newest_start_durable_ts, "start durable", addr->newest_start_durable_ts, false,
- tag));
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
+ unpack->ta.newest_start_durable_ts, "start durable", addr->ta.newest_start_durable_ts,
+ false, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->oldest_start_ts, "oldest start", addr->oldest_start_ts, true, tag));
+ unpack->ta.oldest_start_ts, "oldest start", addr->ta.oldest_start_ts, true, tag));
WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->oldest_start_txn, "oldest start", addr->oldest_start_txn, true, tag, dsk));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
- unpack->newest_stop_durable_ts, "stop durable", addr->newest_stop_durable_ts, false,
- tag));
+ unpack->ta.oldest_start_txn, "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
+
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
+ unpack->ta.newest_stop_durable_ts, "stop durable", addr->ta.newest_stop_durable_ts,
+ false, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->newest_stop_ts, "newest stop", addr->newest_stop_ts, false, tag));
+ unpack->ta.newest_stop_ts, "newest stop", addr->ta.newest_stop_ts, false, tag));
WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->newest_stop_txn, "newest stop", addr->newest_stop_txn, false, tag, dsk));
+ unpack->ta.newest_stop_txn, "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -328,36 +342,52 @@ __verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
- if (unpack->start_ts != WT_TS_NONE && unpack->stop_ts == WT_TS_NONE)
+ if (unpack->tw.start_ts != WT_TS_NONE && unpack->tw.stop_ts == WT_TS_NONE)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->start_ts > unpack->stop_ts)
+ "timestamp of 0; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_ts > unpack->tw.stop_ts)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a start "
- "timestamp %s newer than its stop timestamp %s",
- cell_num - 1, tag, __wt_timestamp_to_string(unpack->start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->stop_ts, ts_string[1]));
- if (unpack->start_txn > unpack->stop_txn)
+ "timestamp newer than its stop timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_txn > unpack->tw.stop_txn)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a start "
- "transaction %" PRIu64
- " newer than its stop "
- "transaction %" PRIu64,
- cell_num - 1, tag, unpack->start_txn, unpack->stop_txn);
+ "transaction newer than its stop "
+ "transaction; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_ts > unpack->tw.durable_start_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a start "
+ "timestamp newer than its durable start timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.stop_ts != WT_TS_MAX && unpack->tw.stop_ts > unpack->tw.durable_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a stop "
+ "timestamp newer than its durable stop timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
if (addr == NULL)
break;
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->start_ts,
- "oldest start", addr->oldest_start_ts, true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->start_txn,
- "oldest start", addr->oldest_start_txn, true, tag, dsk));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->stop_ts,
- "newest stop", addr->newest_stop_ts, false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->stop_txn,
- "newest stop", addr->newest_stop_txn, false, tag, dsk));
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
+ unpack->tw.durable_start_ts, "newest start durable", addr->ta.newest_start_durable_ts,
+ false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_ts,
+ "oldest start", addr->ta.oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_txn,
+ "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
+ unpack->tw.durable_stop_ts, "newest stop durable", addr->ta.newest_stop_durable_ts,
+ false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_ts,
+ "newest stop", addr->ta.newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_txn,
+ "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
break;
}
@@ -707,10 +737,7 @@ __verify_dsk_col_var(
struct {
const void *data;
size_t size;
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
+ WT_TIME_WINDOW tw;
bool deleted;
} last;
WT_BM *bm;
@@ -728,10 +755,7 @@ __verify_dsk_col_var(
last.data = NULL;
last.size = 0;
- last.start_ts = WT_TS_NONE;
- last.start_txn = WT_TXN_NONE;
- last.stop_ts = WT_TS_NONE;
- last.stop_txn = WT_TXN_NONE;
+ __wt_time_window_init(&last.tw);
last.deleted = false;
cell_num = 0;
@@ -760,11 +784,11 @@ __verify_dsk_col_var(
}
/*
- * Compare the last two items and see if reconciliation missed a chance for RLE encoding. We
- * don't have to care about data encoding or anything else, a byte comparison is enough.
+ * Compare the last two items and see if reconciliation missed a chance for RLE encoding.
+ * The time windows must match and we otherwise don't have to care about data encoding, a
+ * byte comparison is enough.
*/
- if (unpack->start_ts != last.start_ts || unpack->start_txn != last.start_txn ||
- unpack->stop_ts != last.stop_ts || unpack->stop_txn != last.stop_txn)
+ if (!__wt_time_windows_equal(&unpack->tw, &last.tw))
;
else if (last.deleted) {
if (cell_type == WT_CELL_DEL)
@@ -777,10 +801,7 @@ match_err:
"have been run-length encoded",
cell_num - 1, cell_num, tag);
- last.start_ts = unpack->start_ts;
- last.start_txn = unpack->start_txn;
- last.stop_ts = unpack->stop_ts;
- last.stop_txn = unpack->stop_txn;
+ __wt_time_window_copy(&last.tw, &unpack->tw);
switch (cell_type) {
case WT_CELL_DEL:
last.data = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index bfd3ecb9f5c..a4a4f8b662d 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -34,7 +34,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
btree = cbt->btree;
ins = NULL;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = upd_arg;
append = logged = false;
@@ -137,7 +137,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
}
/* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
/*
* Point the new WT_UPDATE item to the next element in the list. If we get it right, the
@@ -188,7 +188,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
logged = true;
/* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else
upd_size = __wt_update_list_memsize(upd);
ins->upd = upd;
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index a6d56c9499d..e98cf094421 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -74,7 +74,7 @@ __wt_col_search(
uint32_t base, indx, limit, read_flags;
int depth;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
current = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index b7b1c5edff8..6aa44046cb8 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -58,7 +58,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
ins = NULL;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = upd_arg;
logged = false;
@@ -109,7 +109,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
logged = true;
/* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else {
upd_size = __wt_update_list_memsize(upd);
@@ -169,7 +169,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
logged = true;
/* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else
upd_size = __wt_update_list_memsize(upd);
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 98ae6f66daf..917705f6f9c 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -224,7 +224,7 @@ __wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *le
int cmp, depth;
bool append_check, descend_right, done;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
collator = btree->collator;
item = cbt->tmp;
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index 31e5ab78478..8b5f7299fd4 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -115,7 +115,7 @@ __capacity_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "capacity server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "capacity server error"));
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 122d310934d..5c8ad02b01e 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -116,7 +116,7 @@ __ckpt_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "checkpoint server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "checkpoint server error"));
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 3d4edc58167..beb222d08bb 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -166,7 +166,7 @@ __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *c
dhandle = (WT_DATA_HANDLE *)table;
dhandle->type = WT_DHANDLE_TYPE_TABLE;
} else
- WT_PANIC_RET(session, EINVAL, "illegal handle allocation URI %s", uri);
+ WT_RET_PANIC(session, EINVAL, "illegal handle allocation URI %s", uri);
/* Btree handles keep their data separate from the interface. */
if (dhandle->type == WT_DHANDLE_TYPE_BTREE) {
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 2bc89996afd..1770505d566 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -589,7 +589,7 @@ __log_file_server(void *arg)
* file system may not support truncate: both are OK, it's just more work during
* cursor traversal.
*/
- if (!conn->hot_backup && conn->log_cursors == 0) {
+ if (conn->hot_backup_start == 0 && conn->log_cursors == 0) {
WT_WITH_HOTBACKUP_READ_LOCK(session,
WT_ERR_ERROR_OK(__wt_ftruncate(session, close_fh, close_end_lsn.l.offset),
ENOTSUP, false),
@@ -661,7 +661,7 @@ __log_file_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "log close server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "log close server error"));
}
WT_STAT_CONN_INCRV(session, log_server_sync_blocked, yield_count);
if (locked)
@@ -856,7 +856,7 @@ __log_wrlsn_server(void *arg)
__wt_log_wrlsn(session, NULL);
if (0) {
err:
- WT_PANIC_MSG(session, ret, "log wrlsn server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "log wrlsn server error"));
}
return (WT_THREAD_RET_VALUE);
}
@@ -947,7 +947,7 @@ __log_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "log server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "log server error"));
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 3c28ac121ad..076e64c73ce 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -39,6 +39,8 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
*/
conn->default_session = session;
+ __wt_seconds(session, &conn->ckpt_finish_secs);
+
/*
* Publish: there must be a barrier to ensure the connection structure fields are set before
* other threads read from the pointer.
@@ -208,9 +210,18 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
/*
* Run recovery. NOTE: This call will start (and stop) eviction if recovery is required.
* Recovery must run before the history store table is created (because recovery will update the
- * metadata), and before eviction is started for real.
+ * metadata, and set the maximum file id seen), and before eviction is started for real.
*/
- WT_RET(__wt_txn_recover(session));
+ WT_RET(__wt_txn_recover(session, cfg));
+
+ /* Initialize metadata tracking, required before creating tables. */
+ WT_RET(__wt_meta_track_init(session));
+
+ /*
+ * Create the history store file. This will only actually create it on upgrade or when creating
+ * a new database.
+ */
+ WT_RET(__wt_hs_create(session, cfg));
/*
* Start the optional logging/archive threads. NOTE: The log manager must be started before
@@ -219,12 +230,6 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_RET(__wt_logmgr_open(session));
- /* Initialize metadata tracking, required before creating tables. */
- WT_RET(__wt_meta_track_init(session));
-
- /* Create the history store table. */
- WT_RET(__wt_hs_create(session, cfg));
-
/*
* Start eviction threads. NOTE: Eviction must be started after the history store table is
* created.
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index 455f10ea905..b3a1b3e979a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -596,7 +596,7 @@ __statlog_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "statistics log server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "statistics log server error"));
}
__wt_buf_free(session, &path);
__wt_buf_free(session, &tmp);
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index eeb7ffa514c..934acd228e8 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -325,7 +325,7 @@ __sweep_server(void *arg)
if (0) {
err:
- WT_PANIC_MSG(session, ret, "handle sweep server error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "handle sweep server error"));
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 05dc7e2ff9b..4011a62b0d5 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -319,7 +319,7 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
* We didn't find an entry. This should not happen.
*/
if (i == WT_BLKINCR_MAX)
- WT_PANIC_RET(session, WT_NOTFOUND, "Could not find an incremental backup slot to use");
+ WT_RET_PANIC(session, WT_NOTFOUND, "Could not find an incremental backup slot to use");
/* Use the slot. */
if (blk->id_str != NULL)
@@ -610,7 +610,7 @@ __backup_start(
* Single thread hot backups: we're holding the schema lock, so we know we'll serialize with
* other attempts to start a hot backup.
*/
- if (conn->hot_backup && !is_dup)
+ if (conn->hot_backup_start != 0 && !is_dup)
WT_RET_MSG(session, EINVAL, "there is already a backup cursor open");
if (F_ISSET(session, WT_SESSION_BACKUP_DUP) && is_dup)
@@ -766,7 +766,7 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
WT_TRET(__wt_backup_file_remove(session));
/* Checkpoint deletion and next hot backup can proceed. */
- WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup = false);
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_start = 0);
F_CLR(session, WT_SESSION_BACKUP_CURSOR);
return (ret);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
index 09a9057355d..6eb4351276b 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c
@@ -18,8 +18,8 @@ __bulk_col_keycmp_err(WT_CURSOR_BULK *cbulk)
WT_CURSOR *cursor;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
cursor = &cbulk->cbt.iface;
+ session = CUR2S(cbulk);
WT_RET_MSG(session, EINVAL, "bulk-load presented with out-of-order keys: %" PRIu64
" is less "
@@ -196,8 +196,8 @@ __bulk_row_keycmp_err(WT_CURSOR_BULK *cbulk)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
cursor = &cbulk->cbt.iface;
+ session = CUR2S(cbulk);
WT_ERR(__wt_scr_alloc(session, 512, &a));
WT_ERR(__wt_scr_alloc(session, 512, &b));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 7dfb3bca218..b2d75494110 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -542,7 +542,7 @@ __curfile_cache(WT_CURSOR *cursor)
WT_SESSION_IMPL *session;
cbt = (WT_CURSOR_BTREE *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
cbt->dhandle = cbt->btree->dhandle;
WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle));
@@ -565,7 +565,7 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only)
cbt = (WT_CURSOR_BTREE *)cursor;
dhandle = cbt->dhandle;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
if (check_only)
return (WT_DHANDLE_CAN_REOPEN(dhandle) ? 0 : WT_NOTFOUND);
@@ -655,7 +655,7 @@ __curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[],
cacheable = F_ISSET(session, WT_SESSION_CACHE_CURSORS) && !bulk;
WT_RET(__wt_calloc(session, 1, csize, &cbt));
- cursor = (WT_CURSOR *)cbt;
+ cursor = &cbt->iface;
*cursor = iface;
cursor->session = (WT_SESSION *)session;
cursor->internal_uri = btree->dhandle->name;
diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c
index 0f1fab36bf8..9cc1ba83a4f 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_index.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_index.c
@@ -103,7 +103,7 @@ __curindex_move(WT_CURSOR_INDEX *cindex)
WT_SESSION_IMPL *session;
u_int i;
- session = (WT_SESSION_IMPL *)cindex->iface.session;
+ session = CUR2S(cindex);
first = NULL;
/* Point the public cursor to the key in the child. */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index bb2497f3d19..06159cb54bd 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -35,7 +35,7 @@ __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_ATTRIBUTE((cold))
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET_MSG(session, ENOTSUP, "cursor is being used in a join");
}
@@ -770,7 +770,7 @@ __curjoin_init_bloom(
goto done;
WT_ERR(ret);
} else
- WT_PANIC_ERR(session, EINVAL, "fatal error in join cursor position state");
+ WT_ERR_PANIC(session, EINVAL, "fatal error in join cursor position state");
}
collator = (entry->index == NULL) ? NULL : entry->index->collator;
while (ret == 0) {
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
index 692ab34d210..7e9ac93eea6 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_json.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -373,7 +373,7 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
const char *beginkey, *end, *lparen, *p;
json = (WT_CURSOR_JSON *)cursor->json_private;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
beginkey = colconf->str;
end = beginkey + colconf->len;
diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c
index 6743845cdba..f3e2b2930dc 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_stat.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c
@@ -487,7 +487,7 @@ __curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp)
const char *static_desc;
sgrp = &cst->u.join_stats_group;
- session = (WT_SESSION_IMPL *)sgrp->join_cursor->iface.session;
+ session = CUR2S(sgrp->join_cursor);
WT_RET(__wt_stat_join_desc(cst, slot, &static_desc));
len = strlen("join: ") + strlen(sgrp->desc_prefix) + strlen(static_desc) + 1;
WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index a65bb55a8ba..fd81465eb76 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -29,7 +29,7 @@ __wt_cursor_cached(WT_CURSOR *cursor)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET_MSG(session, ENOTSUP, "Cursor has been closed");
}
@@ -42,7 +42,7 @@ __wt_cursor_notsup(WT_CURSOR *cursor)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET_MSG(session, ENOTSUP, "Unsupported cursor operation");
}
@@ -142,7 +142,7 @@ __wt_cursor_modify_value_format_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, in
WT_UNUSED(nentries);
if (cursor->value_format != NULL && strlen(cursor->value_format) != 0) {
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET_MSG(session, ENOTSUP,
"WT_CURSOR.modify only supported for 'S' and 'u' value "
"formats");
@@ -221,7 +221,7 @@ __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_ATTRIBUTE((cold)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET_MSG(session, cursor->saved_err == 0 ? EINVAL : cursor->saved_err, "requires %s be set",
key ? "key" : "value");
@@ -238,7 +238,7 @@ __wt_cursor_copy_release_item(WT_CURSOR *cursor, WT_ITEM *item) WT_GCC_FUNC_ATTR
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
/* Bail out if the item has been cleared. */
if (item->data == NULL)
@@ -646,7 +646,7 @@ __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
WT_SESSION_IMPL *session;
uint64_t bucket;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_ASSERT(session, !F_ISSET(cursor, WT_CURSTD_CACHED) && dhandle != NULL);
WT_TRET(cursor->reset(cursor));
@@ -687,7 +687,7 @@ __wt_cursor_reopen(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
WT_SESSION_IMPL *session;
uint64_t bucket;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_CACHED));
if (dhandle != NULL) {
@@ -892,7 +892,7 @@ __wt_cursor_close(WT_CURSOR *cursor)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
if (F_ISSET(cursor, WT_CURSTD_OPEN)) {
TAILQ_REMOVE(&session->cursors, cursor, q);
@@ -1066,7 +1066,7 @@ __wt_cursor_init(
WT_SESSION_IMPL *session;
bool readonly;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
if (cursor->internal_uri == NULL)
WT_RET(__wt_strdup(session, uri, &cursor->internal_uri));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c
index 4fd78188c39..b5edf359059 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_table.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_table.c
@@ -150,7 +150,7 @@ __apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, bool skip_immutable)
int (*f)(WT_CURSOR *);
cp = ctable->idx_cursors;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
+ session = CUR2S(ctable);
for (i = 0; i < ctable->table->nindices; i++, cp++) {
idx = ctable->table->indices[i];
@@ -729,7 +729,7 @@ __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop)
int cmp;
ctable = (start != NULL) ? start : stop;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
+ session = CUR2S(ctable);
wt_start = &start->iface;
wt_stop = &stop->iface;
@@ -877,7 +877,7 @@ __curtable_open_colgroups(WT_CURSOR_TABLE *ctable, const char *cfg_arg[])
const char *cfg[] = {cfg_arg[0], cfg_arg[1], "dump=\"\",readonly=0", NULL, NULL};
u_int i;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
+ session = CUR2S(ctable);
table = ctable->table;
WT_RET(__curtable_complete(session, table)); /* completeness check */
@@ -904,7 +904,7 @@ __curtable_open_indices(WT_CURSOR_TABLE *ctable)
WT_TABLE *table;
u_int i;
- session = (WT_SESSION_IMPL *)ctable->iface.session;
+ session = CUR2S(ctable);
table = ctable->table;
WT_RET(__wt_schema_open_indices(session, table));
diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox
index 610033d05cf..ac18263eff0 100644
--- a/src/third_party/wiredtiger/src/docs/backup.dox
+++ b/src/third_party/wiredtiger/src/docs/backup.dox
@@ -56,10 +56,9 @@ aggregate the file names from the cursor and then list the file names as
arguments to a file archiver such as the system tar utility.
During the period the backup cursor is open, database checkpoints can
-be created, but no checkpoints can be deleted. This may result in
-significant file growth. Additionally while the backup cursor is open
-automatic log file archiving, even if enabled, will not reclaim any
-log files.
+be created, but checkpoints created prior to the backup cursor cannot
+be deleted. Additionally while the backup cursor is open automatic log
+file archiving, even if enabled, will not reclaim any log files.
Additionally, if a crash occurs during the period the backup cursor is
open and logging is disabled (in other words, when depending on
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 455e8c15bef..bbc6f3af565 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -325,7 +325,7 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
if (0) {
err:
- WT_PANIC_RET(session, ret, "cache eviction thread error");
+ WT_RET_PANIC(session, ret, "cache eviction thread error");
}
return (ret);
}
@@ -362,7 +362,7 @@ __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread)
if (0) {
err:
- WT_PANIC_RET(session, ret, "cache eviction thread error");
+ WT_RET_PANIC(session, ret, "cache eviction thread error");
}
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index ec93cf88a75..6d0d3be7fc8 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -122,11 +122,12 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint8_t previous_state, uint32
* affect those already-running history store operations by changing the cursor state. When
* doing history store operations, we set the no-reconciliation flag, use it as short-hand to
* avoid that problem. This doesn't open up the window for the deadlock because setting the
- * no-reconciliation flag limits eviction to in-memory splits. FIXME: This isn't reasonable and
- * needs a better fix.
+ * no-reconciliation flag limits eviction to in-memory splits.
*
* The test for the connection's default session is because there are known problems with using
- * cached cursors from the default session. FIXME: This isn't reasonable and needs a better fix.
+ * cached cursors from the default session.
+ *
+ * FIXME-WT-6037: This isn't reasonable and needs a better fix.
*/
if (!WT_IS_METADATA(S2BT(session)->dhandle) && !F_ISSET(conn, WT_CONN_IN_MEMORY) &&
session->hs_cursor == NULL && !F_ISSET(session, WT_SESSION_NO_RECONCILE) &&
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 2770c48ad53..ed1db846793 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -9,10 +9,19 @@
#include "wt_internal.h"
/*
+ * WT_HS_TIME_PAIR --
+ * A pair containing a timestamp and transaction id.
+ */
+typedef struct {
+ wt_timestamp_t timestamp;
+ uint64_t txnid;
+} WT_HS_TIME_PAIR;
+
+/*
* When an operation is accessing the history store table, it should ignore the cache size (since
- * the cache is already full), and the operation can't reenter reconciliation.
+ * the cache is already full).
*/
-#define WT_HS_SESSION_FLAGS (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_RECONCILE)
+#define WT_HS_SESSION_FLAGS WT_SESSION_IGNORE_CACHE_SIZE
static int __hs_delete_key_from_pos(
WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, const WT_ITEM *key);
@@ -282,7 +291,7 @@ __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd)
WT_SESSION_IMPL *session;
WT_UPDATE *last_upd;
- session = (WT_SESSION_IMPL *)hs_cbt->iface.session;
+ session = CUR2S(hs_cbt);
/* If there are existing updates, append them after the new updates. */
if (hs_cbt->compare == 0) {
@@ -355,15 +364,14 @@ __hs_insert_updates_verbose(WT_SESSION_IMPL *session, WT_BTREE *btree)
static int
__hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
const WT_ITEM *key, const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value,
- WT_TIME_PAIR stop_ts_pair)
+ WT_HS_TIME_PAIR stop_ts_pair)
{
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
- WT_UPDATE *hs_upd;
- uint32_t session_flags;
+ WT_UPDATE *hs_upd, *upd_local;
cbt = (WT_CURSOR_BTREE *)cursor;
- hs_upd = NULL;
+ hs_upd = upd_local = NULL;
/*
* Use WT_CURSOR.set_key and WT_CURSOR.set_value to create key and value items, then use them to
@@ -373,23 +381,32 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, W
cursor, btree->id, key, upd->start_ts, __wt_atomic_add64(&btree->hs_counter, 1));
cursor->set_value(cursor, stop_ts_pair.timestamp, upd->durable_ts, (uint64_t)type, hs_value);
- /*
- * Insert a delete record to represent stop time pair for the actual record to be inserted. Set
- * the stop time pair as the commit time pair of the history store delete record.
- */
- WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
- hs_upd->start_ts = stop_ts_pair.timestamp;
- hs_upd->durable_ts = stop_ts_pair.timestamp;
- hs_upd->txnid = stop_ts_pair.txnid;
+ /* Allocate a tombstone only when there is a valid stop time pair. */
+ if (stop_ts_pair.timestamp != WT_TS_MAX || stop_ts_pair.txnid != WT_TXN_MAX) {
+ /*
+ * Insert a delete record to represent stop time pair for the actual record to be inserted.
+ * Set the stop time pair as the commit time pair of the history store delete record.
+ */
+ WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
+ hs_upd->start_ts = stop_ts_pair.timestamp;
+ hs_upd->durable_ts = stop_ts_pair.timestamp;
+ hs_upd->txnid = stop_ts_pair.txnid;
+ }
/*
* Append to the delete record, the actual record to be inserted into the history store. Set the
* current update start time pair as the commit time pair to the history store record.
*/
- WT_ERR(__wt_upd_alloc(session, &cursor->value, WT_UPDATE_STANDARD, &hs_upd->next, NULL));
- hs_upd->next->start_ts = upd->start_ts;
- hs_upd->next->durable_ts = upd->durable_ts;
- hs_upd->next->txnid = upd->txnid;
+ WT_ERR(__wt_upd_alloc(session, &cursor->value, WT_UPDATE_STANDARD, &upd_local, NULL));
+ upd_local->start_ts = upd->start_ts;
+ upd_local->durable_ts = upd->durable_ts;
+ upd_local->txnid = upd->txnid;
+
+ /* Insert the standard update as next update if there is a tombstone. */
+ if (hs_upd != NULL)
+ hs_upd->next = upd_local;
+ else
+ hs_upd = upd_local;
/*
* Search the page and insert the updates. We expect there will be no existing data: assert that
@@ -425,8 +442,7 @@ err:
*/
WT_TRET(__wt_cursor_key_order_init(cbt));
#endif
- session_flags = session->flags;
- F_SET(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
/* We're pointing at the newly inserted update. Iterate once more to avoid deleting it. */
ret = cursor->next(cursor);
if (ret == WT_NOTFOUND)
@@ -435,8 +451,7 @@ err:
WT_TRET(__hs_delete_key_from_pos(session, cursor, btree->id, key));
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
}
- if (!FLD_ISSET(session_flags, WT_SESSION_IGNORE_HS_TOMBSTONE))
- F_CLR(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_CLR(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
}
/* We did a row search, release the cursor so that the page doesn't continue being held. */
cursor->reset(cursor);
@@ -452,7 +467,7 @@ err:
static int
__hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
const WT_ITEM *key, const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value,
- WT_TIME_PAIR stop_ts_pair)
+ WT_HS_TIME_PAIR stop_ts_pair)
{
WT_DECL_RET;
@@ -505,7 +520,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
*/
static int
__hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key,
- const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, WT_TIME_PAIR stop_ts_pair)
+ const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, WT_HS_TIME_PAIR stop_ts_pair)
{
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
@@ -526,7 +541,7 @@ __hs_calculate_full_value(WT_SESSION_IMPL *session, WT_ITEM *full_value, WT_UPDA
{
if (upd->type == WT_UPDATE_MODIFY) {
WT_RET(__wt_buf_set(session, full_value, base_full_value, size));
- WT_RET(__wt_modify_apply_item(session, full_value, upd->data, false));
+ WT_RET(__wt_modify_apply_item(session, S2BT(session)->value_format, full_value, upd->data));
} else {
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
full_value->data = upd->data;
@@ -541,8 +556,10 @@ __hs_calculate_full_value(WT_SESSION_IMPL *session, WT_ITEM *full_value, WT_UPDA
* Copy one set of saved updates into the database's history store table.
*/
int
-__wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi)
+__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
{
+ WT_BTREE *btree;
+ WT_CURSOR *cursor;
WT_DECL_ITEM(full_value);
WT_DECL_ITEM(key);
WT_DECL_ITEM(modify_value);
@@ -554,9 +571,8 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
WT_MODIFY_VECTOR modifies;
WT_SAVE_UPD *list;
- WT_SESSION_IMPL *session;
WT_UPDATE *prev_upd, *upd;
- WT_TIME_PAIR stop_ts_pair;
+ WT_HS_TIME_PAIR stop_ts_pair;
wt_off_t hs_size;
uint64_t insert_cnt, max_hs_size;
uint32_t i;
@@ -564,8 +580,9 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL
int nentries;
bool squashed;
+ btree = S2BT(session);
+ cursor = session->hs_cursor;
prev_upd = NULL;
- session = (WT_SESSION_IMPL *)cursor->session;
insert_cnt = 0;
__wt_modify_vector_init(session, &modifies);
@@ -585,9 +602,6 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL
if (list->onpage_upd == NULL)
continue;
- /* onpage_upd now is always from the update chain */
- WT_ASSERT(session, !F_ISSET(list->onpage_upd, WT_UPDATE_RESTORED_FROM_DISK));
-
/* History store table key component: source key. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -696,13 +710,25 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL
__wt_modify_vector_pop(&modifies, &prev_upd);
/*
- * Set the stop timestamp from durable timestamp instead of commit timestamp. The
- * Garbage collection of history store removes the history values once the stop
- * timestamp is globally visible. i.e. durable timestamp of data store version.
+ * For any uncommitted prepared updates written to disk, the stop timestamp of the last
+ * update moved into the history store should be with max visibility to protect its
+ * removal by checkpoint garbage collection until the data store update is committed.
*/
- WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts);
- stop_ts_pair.timestamp = prev_upd->durable_ts;
- stop_ts_pair.txnid = prev_upd->txnid;
+ if (prev_upd->prepare_state == WT_PREPARE_INPROGRESS) {
+ WT_ASSERT(session,
+ list->onpage_upd == prev_upd || list->onpage_upd->txnid == prev_upd->txnid);
+ stop_ts_pair.timestamp = WT_TS_MAX;
+ stop_ts_pair.txnid = WT_TXN_MAX;
+ } else {
+ /*
+ * Set the stop timestamp from durable timestamp instead of commit timestamp. The
+ * garbage collection of history store removes the history values once the stop
+ * timestamp is globally visible. i.e. durable timestamp of data store version.
+ */
+ WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts);
+ stop_ts_pair.timestamp = prev_upd->durable_ts;
+ stop_ts_pair.txnid = prev_upd->txnid;
+ }
if (prev_upd->type == WT_UPDATE_TOMBSTONE) {
WT_ASSERT(session, modifies.size > 0);
@@ -764,10 +790,9 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL
WT_STAT_CONN_SET(session, cache_hs_ondisk, hs_size);
max_hs_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max;
if (max_hs_size != 0 && (uint64_t)hs_size > max_hs_size)
- WT_PANIC_ERR(session, WT_PANIC, "WiredTigerHS: file size of %" PRIu64
- " exceeds maximum "
- "size %" PRIu64,
- (uint64_t)hs_size, max_hs_size);
+ WT_ERR_PANIC(session, WT_PANIC,
+ "WiredTigerHS: file size of %" PRIu64 " exceeds maximum size %" PRIu64, (uint64_t)hs_size,
+ max_hs_size);
err:
if (ret == 0 && insert_cnt > 0)
@@ -810,9 +835,6 @@ __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t bt
* Note that we need to compare the raw key off the cursor to determine where we are in the
* history store as opposed to comparing the embedded data store key since the ordering is not
* guaranteed to be the same.
- *
- * FIXME: We should be repeatedly moving the cursor backwards within the loop instead of doing a
- * search near operation each time as it is cheaper.
*/
cursor->set_key(
cursor, btree_id, key, timestamp != WT_TS_NONE ? timestamp : WT_TS_MAX, UINT64_MAX);
@@ -863,15 +885,14 @@ __hs_restore_read_timestamp(WT_SESSION_IMPL *session)
* prepare conflict will be returned upon reading a prepared update.
*/
int
-__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDATE **updp,
- bool allow_prepare, WT_ITEM *on_disk_buf)
+__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno,
+ WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
{
WT_CURSOR *hs_cursor;
- WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(hs_value);
WT_DECL_ITEM(orig_hs_value_buf);
WT_DECL_RET;
- WT_ITEM recno_key;
+ WT_ITEM hs_key, recno_key;
WT_MODIFY_VECTOR modifies;
WT_TXN *txn;
WT_UPDATE *mod_upd, *upd;
@@ -883,11 +904,10 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
int cmp;
bool is_owner, modify;
- *updp = NULL;
-
hs_cursor = NULL;
mod_upd = upd = NULL;
orig_hs_value_buf = NULL;
+ WT_CLEAR(hs_key);
__wt_modify_vector_init(session, &modifies);
txn = session->txn;
hs_btree_id = S2BT(session)->id;
@@ -914,8 +934,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
key->size = WT_PTRDIFF(p, recno_key_buf);
}
- /* Allocate buffers for the history store key/value. */
- WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
+ /* Allocate buffer for the history store value. */
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
/* Open a history store table cursor. */
@@ -934,7 +953,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
ret = 0;
goto done;
}
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
/* Stop before crossing over to the next btree */
if (hs_btree_id != S2BT(session)->id)
@@ -944,7 +963,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
* Keys are sorted in an order, skip the ones before the desired key, and bail out if we have
* crossed over the desired key and not found the record we are looking for.
*/
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0)
goto done;
@@ -956,6 +975,13 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
WT_ASSERT(session, upd_type != WT_UPDATE_TOMBSTONE);
/*
+ * If the caller has signalled they don't need the value buffer, don't bother reconstructing a
+ * modify update or copying the contents into the value buffer.
+ */
+ if (upd_value->skip_buf)
+ goto skip_buf;
+
+ /*
* Keep walking until we get a non-modify update. Once we get to that point, squash the updates
* together.
*/
@@ -1008,9 +1034,9 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
* reverse deltas on top of.
*/
WT_ERR(hs_cursor->get_key(
- hs_cursor, &hs_btree_id, hs_key, &hs_start_ts_tmp, &hs_counter_tmp));
+ hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts_tmp, &hs_counter_tmp));
- WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0) {
/* Fallback to the onpage value as the base value. */
@@ -1028,7 +1054,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD);
while (modifies.size > 0) {
__wt_modify_vector_pop(&modifies, &mod_upd);
- WT_ERR(__wt_modify_apply_item(session, hs_value, mod_upd->data, false));
+ WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data));
__wt_free_update_list(session, &mod_upd);
mod_upd = NULL;
}
@@ -1037,19 +1063,18 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA
WT_STAT_CONN_INCR(session, cache_hs_read_squash);
}
- /* Allocate an update structure for the record found. */
- WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &upd, NULL));
- upd->txnid = WT_TXN_NONE;
- upd->durable_ts = durable_timestamp;
- upd->start_ts = hs_start_ts;
- upd->prepare_state = upd->start_ts == upd->durable_ts ? WT_PREPARE_INIT : WT_PREPARE_RESOLVED;
-
/*
- * We're not keeping this in our update list as we want to get rid of it after the read has been
- * dealt with. Mark this update as external and to be discarded when not needed.
+ * Potential optimization: We can likely get rid of this copy and the update allocation above.
+ * We already have buffers containing the modify values so there's no good reason to allocate an
+ * update other than to work with our modify vector implementation.
*/
- F_SET(upd, WT_UPDATE_RESTORED_FROM_DISK);
- *updp = upd;
+ WT_ERR(__wt_buf_set(session, &upd_value->buf, hs_value->data, hs_value->size));
+skip_buf:
+ upd_value->start_ts = hs_start_ts;
+ upd_value->txnid = WT_TXN_NONE;
+ upd_value->type = upd_type;
+ upd_value->prepare_state =
+ (hs_start_ts == durable_timestamp) ? WT_PREPARE_INIT : WT_PREPARE_RESOLVED;
done:
err:
@@ -1059,7 +1084,7 @@ err:
__wt_scr_free(session, &orig_hs_value_buf);
else
__wt_scr_free(session, &hs_value);
- __wt_scr_free(session, &hs_key);
+ WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0);
/*
* Restore the read timestamp if we encountered an error while processing a modify. There's no
@@ -1172,17 +1197,19 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *k
return (0);
WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner));
+
/*
* In order to delete a key range, we need to be able to inspect all history store records
* regardless of their stop time pairs.
*/
- F_SET(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_SET(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+
/* The tree structure can change while we try to insert the mod list, retry if that happens. */
while ((ret = __hs_delete_key_int(session, btree_id, key)) == WT_RESTART)
;
- if (!FLD_ISSET(session_flags, WT_SESSION_IGNORE_HS_TOMBSTONE))
- F_CLR(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_CLR(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+
WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner));
return (ret);
}
@@ -1252,29 +1279,38 @@ err:
static int
__verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32_t this_btree_id)
{
- WT_CURSOR *cursor;
- WT_DECL_ITEM(hs_key);
+ WT_CURSOR *hs_cursor;
WT_DECL_ITEM(prev_hs_key);
- WT_DECL_ITEM(tmp);
WT_DECL_RET;
+ WT_ITEM hs_key;
wt_timestamp_t hs_start_ts;
uint64_t hs_counter;
uint32_t btree_id;
int cmp;
bool found;
- cursor = session->hs_cursor;
+ hs_cursor = session->hs_cursor;
+ WT_CLEAR(hs_key);
- WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &prev_hs_key));
/*
+ * We need to be able to iterate over the history store content for another table. In order to
+ * do this, we must ignore non-globally visible tombstones in the history store since every
+ * history store record is succeeded with a tombstone. We also need to skip the non-globally
+ * visible tombstones in the data table to verify the corresponding entries in the history store
+ * are too present in the data store.
+ */
+ F_SET(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+ F_SET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE);
+
+ /*
* The caller is responsible for positioning the history store cursor at the first record to
* verify. When we return after moving to a new key the caller is responsible for keeping the
* cursor there or deciding they're done.
*/
- for (; ret == 0; ret = cursor->next(cursor)) {
- WT_ERR(cursor->get_key(cursor, &btree_id, hs_key, &hs_start_ts, &hs_counter));
+ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter));
/*
* If the btree id does not match the preview one, we're done. It is up to the caller to set
@@ -1290,34 +1326,34 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32
* If we have already checked against this key, keep going to the next key. We only need to
* check the key once.
*/
- WT_ERR(__wt_compare(session, NULL, hs_key, prev_hs_key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, &hs_key, prev_hs_key, &cmp));
if (cmp == 0)
continue;
- WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(cbt, hs_key, false, NULL, false, NULL));
+ WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(cbt, &hs_key, false, NULL, false, NULL));
WT_ERR(ret);
-/* FIXME: temporarily disable hs verification. */
-#if 0
found = cbt->compare == 0;
-#else
- found = true;
-#endif
WT_ERR(__cursor_reset(cbt));
- if (!found)
- WT_ERR_MSG(session, WT_PANIC,
+ if (!found) {
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ WT_ERR_PANIC(session, WT_PANIC,
"the associated history store key %s was not found in the data store %s",
- __wt_buf_set_printable(session, hs_key->data, hs_key->size, prev_hs_key),
+ __wt_buf_set_printable(session, hs_key.data, hs_key.size, prev_hs_key),
session->dhandle->name);
+ }
- /* Swap current/previous buffers. */
- tmp = hs_key;
- hs_key = prev_hs_key;
- prev_hs_key = tmp;
+ /*
+ * Copy the key memory into our scratch buffer. The key will get invalidated on our next
+ * cursor iteration.
+ */
+ WT_ERR(__wt_buf_set(session, prev_hs_key, hs_key.data, hs_key.size));
}
WT_ERR_NOTFOUND_OK(ret, true);
err:
- __wt_scr_free(session, &hs_key);
+ F_CLR(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE);
+ F_CLR(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+ WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0);
__wt_scr_free(session, &prev_hs_key);
return (ret);
}
@@ -1370,8 +1406,8 @@ __wt_history_store_verify(WT_SESSION_IMPL *session)
{
WT_CURSOR *cursor, *data_cursor;
WT_DECL_ITEM(buf);
- WT_DECL_ITEM(hs_key);
WT_DECL_RET;
+ WT_ITEM hs_key;
wt_timestamp_t hs_start_ts;
uint64_t hs_counter;
uint32_t btree_id, session_flags;
@@ -1382,13 +1418,13 @@ __wt_history_store_verify(WT_SESSION_IMPL *session)
WT_ASSERT(session, S2C(session)->default_session != session);
cursor = data_cursor = NULL;
+ WT_CLEAR(hs_key);
btree_id = WT_BTREE_ID_INVALID;
session_flags = 0; /* [-Wconditional-uninitialized] */
uri_data = NULL;
is_owner = false; /* [-Wconditional-uninitialized] */
WT_ERR(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner));
cursor = session->hs_cursor;
ret = cursor->next(cursor);
@@ -1405,12 +1441,14 @@ __wt_history_store_verify(WT_SESSION_IMPL *session)
* The cursor is positioned either from above or left over from the internal call on the
* first key of a new btree id.
*/
- WT_ERR(cursor->get_key(cursor, &btree_id, hs_key, &hs_start_ts, &hs_counter));
- if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0)
- WT_ERR_MSG(session, WT_PANIC,
+ WT_ERR(cursor->get_key(cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter));
+ if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0) {
+ F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
+ WT_ERR_PANIC(session, WT_PANIC,
"Unable to find btree id %" PRIu32
" in the metadata file for the associated history store key %s",
- btree_id, __wt_buf_set_printable(session, hs_key->data, hs_key->size, buf));
+ btree_id, __wt_buf_set_printable(session, hs_key.data, hs_key.size, buf));
+ }
WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &data_cursor));
F_SET(data_cursor, WT_CURSOR_RAW_OK);
ret = __verify_history_store_id(session, (WT_CURSOR_BTREE *)data_cursor, btree_id);
@@ -1423,7 +1461,7 @@ err:
WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner));
__wt_scr_free(session, &buf);
- __wt_scr_free(session, &hs_key);
+ WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0);
__wt_free(session, uri_data);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 513e0106e53..1bcca8dc686 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -126,14 +126,7 @@ __wt_page_header_byteswap(WT_PAGE_HEADER *dsk)
* An in-memory structure to hold a block's location.
*/
struct __wt_addr {
- /* Validity window */
- wt_timestamp_t newest_start_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_durable_ts;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
- bool prepare;
+ WT_TIME_AGGREGATE ta;
uint8_t *addr; /* Block-manager's cookie */
uint8_t size; /* Block-manager's cookie length */
@@ -159,14 +152,7 @@ struct __wt_addr {
* copy of the WT_REF address information.
*/
struct __wt_addr_copy {
- /* Validity window */
- wt_timestamp_t newest_start_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_durable_ts;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
- bool prepare;
+ WT_TIME_AGGREGATE ta;
uint8_t type;
@@ -640,16 +626,17 @@ struct __wt_page {
uint8_t type; /* Page type */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */
-#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */
-#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */
-#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */
-#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */
-#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */
-#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */
-#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
+#define WT_PAGE_BUILD_KEYS 0x001u /* Keys have been built in memory */
+#define WT_PAGE_DISK_ALLOC 0x002u /* Disk image in allocated memory */
+#define WT_PAGE_DISK_MAPPED 0x004u /* Disk image in mapped memory */
+#define WT_PAGE_EVICT_LRU 0x008u /* Page is on the LRU queue */
+#define WT_PAGE_EVICT_NO_PROGRESS 0x010u /* Eviction doesn't count as progress */
+#define WT_PAGE_INSTANTIATE_PREPARE_UPDATE 0x020u /* Instantiate prepared updates */
+#define WT_PAGE_OVERFLOW_KEYS 0x040u /* Page has overflow keys */
+#define WT_PAGE_SPLIT_INSERT 0x080u /* A leaf page was split for append */
+#define WT_PAGE_UPDATE_IGNORE 0x100u /* Ignore updates on page discard */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
uint8_t unused[2]; /* Unused padding */
@@ -817,15 +804,6 @@ struct __wt_page_deleted {
};
/*
- * WT_TIME_PAIR --
- * A pair containing a timestamp and transaction id.
- */
-struct __wt_time_pair {
- wt_timestamp_t timestamp;
- uint64_t txnid;
-};
-
-/*
* WT_REF --
* A single in-memory page and state information.
*/
@@ -1098,7 +1076,6 @@ struct __wt_update {
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_UPDATE_HS 0x1u /* Update has been written to history store. */
#define WT_UPDATE_RESTORED_FOR_ROLLBACK 0x2u /* Update restored for rollback to stable. */
-#define WT_UPDATE_RESTORED_FROM_DISK 0x4u /* Update is temporary retrieved from disk. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
@@ -1122,6 +1099,39 @@ struct __wt_update {
#define WT_UPDATE_MEMSIZE(upd) WT_ALIGN(WT_UPDATE_SIZE + (upd)->size, 32)
/*
+ * WT_UPDATE_VALUE --
+ *
+ * A generic representation of an update's value regardless of where it exists. This structure is
+ * used to represent both in-memory updates and updates that don't exist in an update list such as
+ * reconstructed modify updates, updates in the history store and onpage values.
+ *
+ * The skip buffer flag is an optimization for callers of various read functions to communicate that
+ * they just want to check that an update exists and not read its underlying value. This means that
+ * the read functions can avoid the performance penalty of reconstructing modifies.
+ */
+struct __wt_update_value {
+ WT_ITEM buf;
+ wt_timestamp_t start_ts;
+ uint64_t txnid;
+ uint8_t type;
+ uint8_t prepare_state;
+ bool skip_buf;
+};
+
+/*
+ * WT_WITH_UPDATE_VALUE_SKIP_BUF --
+ *
+ * A helper macro to use for calling read functions when we're checking for the existence of a given
+ * key. This means that read functions can avoid the performance penalty of reconstructing modifies.
+ */
+#define WT_WITH_UPDATE_VALUE_SKIP_BUF(op) \
+ do { \
+ cbt->upd_value->skip_buf = true; \
+ op; \
+ cbt->upd_value->skip_buf = false; \
+ } while (0)
+
+/*
* WT_MAX_MODIFY_UPDATE, WT_MODIFY_VECTOR_STACK_SIZE
* Limit update chains value to avoid penalizing reads and permit truncation. Having a smaller
* value will penalize the cases when history has to be maintained, resulting in multiplying cache
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index cd1952b00d1..7b3ff5b8f3d 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1115,13 +1115,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
/* If off-page, the pointer references a WT_ADDR structure. */
if (__wt_off_page(page, addr)) {
- copy->oldest_start_ts = addr->oldest_start_ts;
- copy->oldest_start_txn = addr->oldest_start_txn;
- copy->newest_start_durable_ts = addr->newest_start_durable_ts;
- copy->newest_stop_ts = addr->newest_stop_ts;
- copy->newest_stop_txn = addr->newest_stop_txn;
- copy->newest_stop_durable_ts = addr->newest_stop_durable_ts;
- copy->prepare = addr->prepare;
+ __wt_time_aggregate_copy(&copy->ta, &addr->ta);
copy->type = addr->type;
memcpy(copy->addr, addr->addr, copy->size = addr->size);
return (true);
@@ -1129,13 +1123,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
/* If on-page, the pointer references a cell. */
__wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
- copy->oldest_start_ts = unpack->oldest_start_ts;
- copy->oldest_start_txn = unpack->oldest_start_txn;
- copy->newest_start_durable_ts = unpack->newest_start_durable_ts;
- copy->newest_stop_ts = unpack->newest_stop_ts;
- copy->newest_stop_txn = unpack->newest_stop_txn;
- copy->newest_stop_durable_ts = unpack->newest_stop_durable_ts;
- copy->prepare = F_ISSET(unpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&copy->ta, &unpack->ta);
copy->type = 0; /* Avoid static analyzer uninitialized value complaints. */
switch (unpack->raw) {
case WT_CELL_ADDR_INT:
@@ -1708,25 +1696,14 @@ __wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32
*/
static inline int
__wt_bt_col_var_cursor_walk_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_PAGE *page,
- WT_CELL_UNPACK *unpack, WT_COL *cip, WT_UPDATE **updp)
+ WT_CELL_UNPACK *unpack, WT_COL *cip)
{
- WT_UPDATE *upd;
-
- *updp = NULL;
-
cbt->slot = WT_COL_SLOT(page, cip);
- WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, NULL, unpack, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, NULL, unpack));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID || cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
- return (0);
- }
-
- WT_RET(__wt_value_return(cbt, upd));
- *updp = upd;
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
cbt->tmp->data = cbt->iface.value.data;
cbt->tmp->size = cbt->iface.value.size;
diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h
index b80449a8c18..760811e5acf 100644
--- a/src/third_party/wiredtiger/src/include/cell.h
+++ b/src/third_party/wiredtiger/src/include/cell.h
@@ -151,23 +151,10 @@ struct __wt_cell {
struct __wt_cell_unpack {
WT_CELL *cell; /* Cell's disk image address */
- uint64_t v; /* RLE count or recno */
+ WT_TIME_AGGREGATE ta; /* Address validity window */
+ WT_TIME_WINDOW tw; /* Value validity window */
- /* Value validity window */
- wt_timestamp_t start_ts; /* default value: WT_TS_NONE */
- uint64_t start_txn; /* default value: WT_TXN_NONE */
- wt_timestamp_t durable_start_ts; /* default value: WT_TS_NONE */
- wt_timestamp_t stop_ts; /* default value: WT_TS_MAX */
- uint64_t stop_txn; /* default value: WT_TXN_MAX */
- wt_timestamp_t durable_stop_ts; /* default value: WT_TS_NONE */
-
- /* Address validity window */
- wt_timestamp_t oldest_start_ts; /* default value: WT_TS_NONE */
- uint64_t oldest_start_txn; /* default value: WT_TXN_NONE */
- wt_timestamp_t newest_start_durable_ts; /* default value: WT_TS_NONE */
- wt_timestamp_t newest_stop_ts; /* default value: WT_TS_MAX */
- uint64_t newest_stop_txn; /* default value: WT_TXN_MAX */
- wt_timestamp_t newest_stop_durable_ts; /* default value: WT_TS_NONE */
+ uint64_t v; /* RLE count or recno */
/*
* !!!
@@ -185,9 +172,9 @@ struct __wt_cell_unpack {
uint8_t type; /* Cell type */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CELL_UNPACK_OVERFLOW 0x1u /* cell is an overflow */
-#define WT_CELL_UNPACK_PREPARE 0x2u /* cell is part of a prepared transaction */
-#define WT_CELL_UNPACK_TIME_PAIRS_CLEARED 0x4u /* time pairs are cleared because of restart */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
+#define WT_CELL_UNPACK_OVERFLOW 0x1u /* cell is an overflow */
+#define WT_CELL_UNPACK_PREPARE 0x2u /* cell is part of a prepared transaction */
+#define WT_CELL_UNPACK_TIME_WINDOW_CLEARED 0x4u /* time window cleared because of restart */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index bcd23946883..f14eb7f8d15 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -11,47 +11,47 @@
* Check the value's validity window for sanity.
*/
static inline void
-__cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__cell_check_value_validity(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string.
+ * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label
+ * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine.
+ * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new
+ * macros that aren't needed anywhere else, so we're leaving it alone.
+ */
char ts_string[2][WT_TS_INT_STRING_SIZE];
- if (start_ts > durable_start_ts)
- WT_ERR_ASSERT(session, start_ts <= durable_start_ts, WT_PANIC,
+ if (tw->start_ts > tw->durable_start_ts)
+ WT_ERR_ASSERT(session, tw->start_ts <= tw->durable_start_ts, WT_PANIC,
"a start timestamp %s newer than its durable start timestamp %s",
- __wt_timestamp_to_string(start_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_start_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->durable_start_ts, ts_string[1]));
- if (start_ts != WT_TS_NONE && stop_ts == WT_TS_NONE)
- WT_ERR_ASSERT(session, stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0");
+ if (tw->start_ts != WT_TS_NONE && tw->stop_ts == WT_TS_NONE)
+ WT_ERR_ASSERT(session, tw->stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0");
- if (start_ts > stop_ts)
- WT_ERR_ASSERT(session, start_ts <= stop_ts, WT_PANIC,
+ if (tw->start_ts > tw->stop_ts)
+ WT_ERR_ASSERT(session, tw->start_ts <= tw->stop_ts, WT_PANIC,
"a start timestamp %s newer than its stop timestamp %s",
- __wt_timestamp_to_string(start_ts, ts_string[0]),
- __wt_timestamp_to_string(stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->stop_ts, ts_string[1]));
- if (start_txn > stop_txn)
- WT_ERR_ASSERT(session, start_txn <= stop_txn, WT_PANIC,
+ if (tw->start_txn > tw->stop_txn)
+ WT_ERR_ASSERT(session, tw->start_txn <= tw->stop_txn, WT_PANIC,
"a start transaction ID %" PRIu64 " newer than its stop transaction ID %" PRIu64,
- start_txn, stop_txn);
+ tw->start_txn, tw->stop_txn);
- if (stop_ts != WT_TS_MAX && stop_ts > durable_stop_ts)
- WT_ERR_ASSERT(session, stop_ts <= durable_stop_ts, WT_PANIC,
+ if (tw->stop_ts != WT_TS_MAX && tw->stop_ts > tw->durable_stop_ts)
+ WT_ERR_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts, WT_PANIC,
"a stop timestamp %s newer than its durable stop timestamp %s",
- __wt_timestamp_to_string(stop_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->durable_stop_ts, ts_string[1]));
#else
WT_UNUSED(session);
- WT_UNUSED(durable_start_ts);
- WT_UNUSED(durable_stop_ts);
- WT_UNUSED(start_ts);
- WT_UNUSED(start_txn);
- WT_UNUSED(stop_ts);
- WT_UNUSED(stop_txn);
+ WT_UNUSED(tw);
#endif
}
@@ -60,21 +60,17 @@ __cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_sta
* Pack the validity window for a value.
*/
static inline void
-__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare)
+__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_WINDOW *tw)
{
uint8_t flags, *flagsp;
/* Globally visible values have no associated validity window. */
- if (durable_start_ts == WT_TS_NONE && start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE &&
- durable_stop_ts == WT_TS_NONE && stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) {
+ if (__wt_time_window_is_empty(tw)) {
++*pp;
return;
}
- __cell_check_value_validity(
- session, durable_start_ts, start_ts, start_txn, durable_stop_ts, stop_ts, stop_txn);
+ __cell_check_value_validity(session, tw);
**pp |= WT_CELL_SECOND_DESC;
++*pp;
@@ -82,46 +78,41 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_
++*pp;
flags = 0;
- if (start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts));
+ if (tw->start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_ts));
LF_SET(WT_CELL_TS_START);
}
- if (start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn));
+ if (tw->start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_txn));
LF_SET(WT_CELL_TXN_START);
}
- if (durable_start_ts != WT_TS_NONE) {
- WT_ASSERT(session, start_ts != WT_TS_NONE && start_ts <= durable_start_ts);
+ if (tw->durable_start_ts != WT_TS_NONE) {
+ WT_ASSERT(session, tw->start_ts <= tw->durable_start_ts);
/* Store differences if any, not absolutes. */
- if (durable_start_ts - start_ts > 0) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_start_ts - start_ts));
+ if (tw->durable_start_ts - tw->start_ts > 0) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_start_ts - tw->start_ts));
LF_SET(WT_CELL_TS_DURABLE_START);
}
}
- if (stop_ts != WT_TS_MAX) {
+ if (tw->stop_ts != WT_TS_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_ts - start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_ts - tw->start_ts));
LF_SET(WT_CELL_TS_STOP);
}
- if (stop_txn != WT_TXN_MAX) {
+ if (tw->stop_txn != WT_TXN_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_txn - start_txn));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_txn - tw->start_txn));
LF_SET(WT_CELL_TXN_STOP);
}
- if (durable_stop_ts != WT_TS_NONE) {
- WT_ASSERT(session, stop_ts != WT_TS_MAX && stop_ts <= durable_stop_ts);
+ if (tw->durable_stop_ts != WT_TS_NONE) {
+ WT_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts);
/* Store differences if any, not absolutes. */
- if (durable_stop_ts - stop_ts > 0) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_stop_ts - stop_ts));
+ if (tw->durable_stop_ts - tw->stop_ts > 0) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_stop_ts - tw->stop_ts));
LF_SET(WT_CELL_TS_DURABLE_STOP);
}
}
- /*
- * Currently, no uncommitted prepared updates are written to the data store, so this flag must
- * be false until we allow writing them in WT-5984. In that ticket this assert must be removed.
- */
- WT_ASSERT(session, prepare == false);
- if (prepare)
+ if (tw->prepare)
LF_SET(WT_CELL_PREPARE);
*flagsp = flags;
}
@@ -131,47 +122,47 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_
* Check the address' validity window for sanity.
*/
static inline void
-__wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+__wt_check_addr_validity(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta)
{
#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string.
+ * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label
+ * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine.
+ * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new
+ * macros that aren't needed anywhere else, so we're leaving it alone.
+ */
char ts_string[2][WT_TS_INT_STRING_SIZE];
- if (oldest_start_ts != WT_TS_NONE && newest_stop_ts == WT_TS_NONE)
+ if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE)
WT_ERR_ASSERT(
- session, newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0");
+ session, ta->newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0");
- if (oldest_start_ts > newest_stop_ts)
- WT_ERR_ASSERT(session, oldest_start_ts <= newest_stop_ts, WT_PANIC,
+ if (ta->oldest_start_ts > ta->newest_stop_ts)
+ WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_stop_ts, WT_PANIC,
"an oldest start timestamp %s newer than its newest stop timestamp %s",
- __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(newest_stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[1]));
- if (oldest_start_txn > newest_stop_txn)
- WT_ERR_ASSERT(session, oldest_start_txn <= newest_stop_txn, WT_PANIC,
+ if (ta->oldest_start_txn > ta->newest_stop_txn)
+ WT_ERR_ASSERT(session, ta->oldest_start_txn <= ta->newest_stop_txn, WT_PANIC,
"an oldest start transaction %" PRIu64 " newer than its newest stop transaction %" PRIu64,
- oldest_start_txn, newest_stop_txn);
+ ta->oldest_start_txn, ta->newest_stop_txn);
- if (oldest_start_ts > start_durable_ts)
- WT_ERR_ASSERT(session, oldest_start_ts <= start_durable_ts, WT_PANIC,
+ if (ta->oldest_start_ts > ta->newest_start_durable_ts)
+ WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts, WT_PANIC,
"an oldest start timestamp %s newer than its durable start timestamp %s",
- __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(start_durable_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_start_durable_ts, ts_string[1]));
- if (newest_stop_ts != WT_TS_MAX && newest_stop_ts > stop_durable_ts)
- WT_ERR_ASSERT(session, newest_stop_ts <= stop_durable_ts, WT_PANIC,
+ if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts)
+ WT_ERR_ASSERT(session, ta->newest_stop_ts <= ta->newest_stop_durable_ts, WT_PANIC,
"a newest stop timestamp %s newer than its durable stop timestamp %s",
- __wt_timestamp_to_string(newest_stop_ts, ts_string[0]),
- __wt_timestamp_to_string(stop_durable_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_stop_durable_ts, ts_string[1]));
#else
WT_UNUSED(session);
- WT_UNUSED(start_durable_ts);
- WT_UNUSED(oldest_start_ts);
- WT_UNUSED(oldest_start_txn);
- WT_UNUSED(stop_durable_ts);
- WT_UNUSED(newest_stop_ts);
- WT_UNUSED(newest_stop_txn);
+ WT_UNUSED(ta);
#endif
}
@@ -180,22 +171,17 @@ __wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_
* Pack the validity window for an address.
*/
static inline void
-__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, bool prepare)
+__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_AGGREGATE *ta)
{
uint8_t flags, *flagsp;
/* Globally visible values have no associated validity window. */
- if (start_durable_ts == WT_TS_NONE && stop_durable_ts == WT_TS_NONE &&
- oldest_start_ts == WT_TS_NONE && oldest_start_txn == WT_TXN_NONE &&
- newest_stop_ts == WT_TS_MAX && newest_stop_txn == WT_TXN_MAX) {
+ if (__wt_time_aggregate_is_empty(ta)) {
++*pp;
return;
}
- __wt_check_addr_validity(session, start_durable_ts, oldest_start_ts, oldest_start_txn,
- stop_durable_ts, newest_stop_ts, newest_stop_txn);
+ __wt_check_addr_validity(session, ta);
**pp |= WT_CELL_SECOND_DESC;
++*pp;
@@ -203,21 +189,18 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
++*pp;
flags = 0;
- if (oldest_start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts));
+ if (ta->oldest_start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_ts));
LF_SET(WT_CELL_TS_START);
}
- if (oldest_start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn));
+ if (ta->oldest_start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_txn));
LF_SET(WT_CELL_TXN_START);
}
- if (start_durable_ts != WT_TS_NONE) {
+ if (ta->newest_start_durable_ts != WT_TS_NONE) {
/* Store differences, not absolutes. */
- /*
- * FIXME-prepare-support:
- * WT_ASSERT(
- * session, oldest_start_ts != WT_TS_NONE && oldest_start_ts <= start_durable_ts);
- */
+ WT_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts);
+
/*
* Unlike value cell, we store the durable start timestamp even the difference is zero
* compared to oldest commit timestamp. The difference can only be zero when the page
@@ -225,43 +208,38 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
* having that check to find out whether it is zero or not will unnecessarily add overhead
* than benefit.
*/
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_durable_ts - oldest_start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_start_durable_ts - ta->oldest_start_ts));
LF_SET(WT_CELL_TS_DURABLE_START);
}
- if (newest_stop_ts != WT_TS_MAX) {
+ if (ta->newest_stop_ts != WT_TS_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_ts - ta->oldest_start_ts));
LF_SET(WT_CELL_TS_STOP);
}
- if (newest_stop_txn != WT_TXN_MAX) {
+ if (ta->newest_stop_txn != WT_TXN_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_txn - oldest_start_txn));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_txn - ta->oldest_start_txn));
LF_SET(WT_CELL_TXN_STOP);
}
- if (stop_durable_ts != WT_TS_NONE) {
- /* Store differences, not absolutes. */
- /*
- * FIXME-prepare-support:
- * WT_ASSERT(session,
- * newest_stop_ts != WT_TS_MAX && newest_stop_ts <= stop_durable__ts);
- */
+ if (ta->newest_stop_durable_ts != WT_TS_NONE) {
+ WT_ASSERT(session,
+ ta->newest_stop_ts == WT_TS_MAX || ta->newest_stop_ts <= ta->newest_stop_durable_ts);
+
/*
+ * Store differences, not absolutes.
+ *
* Unlike value cell, we store the durable stop timestamp even the difference is zero
* compared to newest commit timestamp. The difference can only be zero when the page
* contains all the key/value pairs with the same timestamp. But this scenario is rare and
* having that check to find out whether it is zero or not will unnecessarily add overhead
* than benefit.
*/
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_durable_ts - newest_stop_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_durable_ts - ta->newest_stop_ts));
LF_SET(WT_CELL_TS_DURABLE_STOP);
}
- /*
- * Currently, no uncommitted prepared updates are written to the data store, so this flag must
- * be false until we allow writing them in WT-5984. In that ticket this assert must be removed.
- */
- WT_ASSERT(session, prepare == false);
- if (prepare)
+ if (ta->prepare)
LF_SET(WT_CELL_PREPARE);
+
*flagsp = flags;
}
@@ -271,9 +249,7 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
*/
static inline size_t
__wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno,
- wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn,
- bool prepare, size_t size)
+ WT_TIME_AGGREGATE *ta, size_t size)
{
uint8_t *p;
@@ -281,8 +257,7 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui
p = cell->__chunk;
*p = '\0';
- __cell_pack_addr_validity(session, &p, start_durable_ts, oldest_start_ts, oldest_start_txn,
- stop_durable_ts, newest_stop_ts, newest_stop_txn, prepare);
+ __cell_pack_addr_validity(session, &p, ta);
if (recno == WT_RECNO_OOB)
cell->__chunk[0] |= (uint8_t)cell_type; /* Type */
@@ -301,9 +276,8 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui
* Set a value item's WT_CELL contents.
*/
static inline size_t
-__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, size_t size)
+__wt_cell_pack_value(
+ WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, size_t size)
{
uint8_t byte, *p;
bool validity;
@@ -312,8 +286,7 @@ __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t dur
p = cell->__chunk;
*p = '\0';
- __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn, durable_stop_ts,
- stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
/*
* Short data cells without a validity window or run-length encoding have 6 bits of data length
@@ -435,9 +408,8 @@ __wt_cell_pack_value_match(
* Write a copy value cell.
*/
static inline size_t
-__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, uint64_t v)
+__wt_cell_pack_copy(
+ WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, uint64_t v)
{
uint8_t *p;
@@ -445,8 +417,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star
p = cell->__chunk;
*p = '\0';
- __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts,
- stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
if (rle < 2)
cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */
@@ -466,9 +437,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star
* Write a deleted value cell.
*/
static inline size_t
-__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle)
{
uint8_t *p;
@@ -476,9 +445,8 @@ __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start
p = cell->__chunk;
*p = '\0';
- /* FIXME-prepare-support: we should pass prepare value. */
- __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts,
- stop_ts, stop_txn, false);
+ /* FIXME-WT-6124: we should set the time window prepare value. */
+ __cell_pack_value_validity(session, &p, tw);
if (rle < 2)
cell->__chunk[0] |= WT_CELL_DEL; /* Type */
@@ -564,9 +532,7 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
* Pack an overflow cell.
*/
static inline size_t
-__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
- wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
+__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, WT_TIME_WINDOW *tw,
uint64_t rle, size_t size)
{
uint8_t *p;
@@ -578,12 +544,12 @@ __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
switch (type) {
case WT_CELL_KEY_OVFL:
case WT_CELL_KEY_OVFL_RM:
+ WT_ASSERT(session, tw == NULL);
++p;
break;
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
- __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn,
- durable_stop_ts, stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
break;
}
@@ -739,26 +705,22 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
{
struct {
uint64_t v;
- wt_timestamp_t start_ts;
- wt_timestamp_t durable_start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- wt_timestamp_t durable_stop_ts;
- uint64_t stop_txn;
+ WT_TIME_WINDOW tw;
uint32_t len;
} copy;
+ WT_TIME_AGGREGATE *ta;
+ WT_TIME_WINDOW *tw;
uint64_t v;
const uint8_t *p;
uint8_t flags;
+ bool copy_cell;
+
+ copy_cell = false;
+ copy.len = 0; /* [-Wconditional-uninitialized] */
+ copy.v = 0; /* [-Wconditional-uninitialized] */
- copy.v = 0; /* -Werror=maybe-uninitialized */
- copy.start_ts = WT_TS_NONE;
- copy.durable_start_ts = WT_TS_NONE;
- copy.start_txn = WT_TXN_NONE;
- copy.stop_ts = WT_TS_MAX;
- copy.durable_stop_ts = WT_TS_NONE;
- copy.stop_txn = WT_TXN_MAX;
- copy.len = 0;
+ tw = &unpack->tw;
+ ta = &unpack->ta;
/*
* The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which
@@ -789,18 +751,8 @@ restart:
* following switch. All validity windows default to durability.
*/
unpack->v = 0;
- unpack->durable_start_ts = WT_TS_NONE;
- unpack->durable_stop_ts = WT_TS_NONE;
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_start_durable_ts = WT_TS_NONE;
- unpack->newest_stop_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_window_init(&unpack->tw);
+ __wt_time_aggregate_init(&unpack->ta);
unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
unpack->type = (uint8_t)__wt_cell_type(cell);
unpack->flags = 0;
@@ -852,39 +804,38 @@ restart:
break;
flags = *p++; /* skip second descriptor byte */
- if (LF_ISSET(WT_CELL_PREPARE))
+ if (LF_ISSET(WT_CELL_PREPARE)) {
F_SET(unpack, WT_CELL_UNPACK_PREPARE);
+ ta->prepare = 1;
+ }
if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_ts));
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts));
if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_txn));
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_txn));
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_start_durable_ts));
- unpack->newest_start_durable_ts += unpack->oldest_start_ts;
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_start_durable_ts));
+ ta->newest_start_durable_ts += ta->oldest_start_ts;
}
if (LF_ISSET(WT_CELL_TS_STOP)) {
WT_RET(
- __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
- unpack->newest_stop_ts += unpack->oldest_start_ts;
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_ts));
+ ta->newest_stop_ts += ta->oldest_start_ts;
}
if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_txn));
- unpack->newest_stop_txn += unpack->oldest_start_txn;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_txn));
+ ta->newest_stop_txn += ta->oldest_start_txn;
}
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_durable_ts));
- unpack->newest_stop_durable_ts += unpack->newest_stop_ts;
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_durable_ts));
+ ta->newest_stop_durable_ts += ta->newest_stop_ts;
}
-
- __wt_check_addr_validity(session, unpack->newest_start_durable_ts, unpack->oldest_start_ts,
- unpack->oldest_start_txn, unpack->newest_stop_durable_ts, unpack->newest_stop_ts,
- unpack->newest_stop_txn);
+ __wt_check_addr_validity(session, ta);
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -895,38 +846,39 @@ restart:
break;
flags = *p++; /* skip second descriptor byte */
- if (LF_ISSET(WT_CELL_PREPARE))
+ if (LF_ISSET(WT_CELL_PREPARE)) {
F_SET(unpack, WT_CELL_UNPACK_PREPARE);
+ tw->prepare = 1;
+ }
if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts));
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_ts));
if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_txn));
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_txn));
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_start_ts));
- unpack->durable_start_ts += unpack->start_ts;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_start_ts));
+ tw->durable_start_ts += tw->start_ts;
} else
- unpack->durable_start_ts = unpack->start_ts;
+ tw->durable_start_ts = tw->start_ts;
if (LF_ISSET(WT_CELL_TS_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
- unpack->stop_ts += unpack->start_ts;
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_ts));
+ tw->stop_ts += tw->start_ts;
}
if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_txn));
- unpack->stop_txn += unpack->start_txn;
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_txn));
+ tw->stop_txn += tw->start_txn;
}
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_stop_ts));
- unpack->durable_stop_ts += unpack->stop_ts;
- } else if (unpack->stop_ts != WT_TS_MAX)
- unpack->durable_stop_ts = unpack->stop_ts;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_stop_ts));
+ tw->durable_stop_ts += tw->stop_ts;
+ } else if (tw->stop_ts != WT_TS_MAX)
+ tw->durable_stop_ts = tw->stop_ts;
else
- unpack->durable_stop_ts = WT_TS_NONE;
+ tw->durable_stop_ts = WT_TS_NONE;
- __cell_check_value_validity(session, unpack->durable_start_ts, unpack->start_ts,
- unpack->start_txn, unpack->durable_stop_ts, unpack->stop_ts, unpack->stop_txn);
+ __cell_check_value_validity(session, tw);
break;
}
@@ -943,19 +895,16 @@ restart:
*/
switch (unpack->raw) {
case WT_CELL_VALUE_COPY:
+ copy_cell = true;
+
/*
* The cell is followed by an offset to a cell written earlier in the page. Save/restore the
- * length and RLE of this cell, we need the length to step through the set of cells on the
- * page and this RLE is probably different from the RLE of the earlier cell.
+ * visibility window, length and RLE of this cell, we need the length to step through the
+ * set of cells on the page and the RLE and timestamp information are specific to this cell.
*/
+ __wt_time_window_copy(&copy.tw, tw);
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
copy.v = unpack->v;
- copy.start_ts = unpack->start_ts;
- copy.durable_start_ts = unpack->durable_start_ts;
- copy.start_txn = unpack->start_txn;
- copy.stop_ts = unpack->stop_ts;
- copy.durable_stop_ts = unpack->durable_stop_ts;
- copy.stop_txn = unpack->stop_txn;
copy.len = WT_PTRDIFF32(p, cell);
cell = (WT_CELL *)((uint8_t *)cell - v);
goto restart;
@@ -1003,22 +952,17 @@ restart:
return (WT_ERROR); /* Unknown cell type. */
}
-/*
- * Check the original cell against the full cell length (this is a diagnostic as well, we may be
- * copying the cell from the page and we need the right length).
- */
done:
+ /*
+ * Check the original cell against the full cell length (this is a diagnostic as well, we may be
+ * copying the cell from the page and we need the right length).
+ */
WT_CELL_LEN_CHK(cell, unpack->__len);
- if (copy.len != 0) {
- unpack->raw = WT_CELL_VALUE_COPY;
+ if (copy_cell) {
+ __wt_time_window_copy(tw, &copy.tw);
unpack->v = copy.v;
- unpack->start_ts = copy.start_ts;
- unpack->durable_start_ts = copy.durable_start_ts;
- unpack->start_txn = copy.start_txn;
- unpack->stop_ts = copy.stop_ts;
- unpack->durable_stop_ts = copy.durable_stop_ts;
- unpack->stop_txn = copy.stop_txn;
unpack->__len = copy.len;
+ unpack->raw = WT_CELL_VALUE_COPY;
}
return (0);
@@ -1032,6 +976,12 @@ static inline void
__wt_cell_unpack_dsk(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
+ WT_TIME_AGGREGATE *ta;
+ WT_TIME_WINDOW *tw;
+
+ ta = &unpack->ta;
+ tw = &unpack->tw;
+
/*
* Row-store doesn't store zero-length values on pages, but this allows us to pretend.
*/
@@ -1042,18 +992,8 @@ __wt_cell_unpack_dsk(
* If there isn't any value validity window (which is what it will take to get to a
* zero-length item), the value must be stable.
*/
- unpack->durable_start_ts = WT_TS_NONE;
- unpack->durable_stop_ts = WT_TS_NONE;
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_start_durable_ts = WT_TS_NONE;
- unpack->newest_stop_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_window_init(tw);
+ __wt_time_aggregate_init(ta);
unpack->data = "";
unpack->size = 0;
unpack->__len = 0;
@@ -1081,30 +1021,30 @@ __wt_cell_unpack_dsk(
* Previous startup txnid=0, ts=y txnid=0, ts=WT_TS_NONE txnid=MAX, ts=MAX
*/
if (dsk->write_gen > 0 && dsk->write_gen <= S2C(session)->base_write_gen) {
- /* FIXME-prepare-support: deal with durable timestamps. */
+ /* FIXME-WT-6124: deal with durable timestamps. */
/* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
- if (unpack->start_txn != WT_TXN_NONE) {
- unpack->start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
+ if (tw->start_txn != WT_TXN_NONE) {
+ tw->start_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
}
- if (unpack->stop_txn != WT_TXN_MAX) {
- unpack->stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
- if (unpack->stop_ts == WT_TS_MAX)
- unpack->stop_ts = WT_TS_NONE;
+ if (tw->stop_txn != WT_TXN_MAX) {
+ tw->stop_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ if (tw->stop_ts == WT_TS_MAX)
+ tw->stop_ts = WT_TS_NONE;
} else
- WT_ASSERT(session, unpack->stop_ts == WT_TS_MAX);
- if (unpack->oldest_start_txn != WT_TXN_NONE) {
- unpack->oldest_start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
+ WT_ASSERT(session, tw->stop_ts == WT_TS_MAX);
+ if (ta->oldest_start_txn != WT_TXN_NONE) {
+ ta->oldest_start_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
}
- if (unpack->newest_stop_txn != WT_TXN_MAX) {
- unpack->newest_stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
- if (unpack->newest_stop_ts == WT_TS_MAX)
- unpack->newest_stop_ts = WT_TS_NONE;
+ if (ta->newest_stop_txn != WT_TXN_MAX) {
+ ta->newest_stop_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ if (ta->newest_stop_ts == WT_TS_MAX)
+ ta->newest_stop_ts = WT_TS_NONE;
} else
- WT_ASSERT(session, unpack->newest_stop_ts == WT_TS_MAX);
+ WT_ASSERT(session, ta->newest_stop_ts == WT_TS_MAX);
}
}
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index f6bd37c5124..547feaa54a3 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -150,10 +150,10 @@ struct __wt_named_extractor {
* WT_CONN_HOTBACKUP_START --
* Macro to set connection data appropriately for when we commence hot backup.
*/
-#define WT_CONN_HOTBACKUP_START(conn) \
- do { \
- (conn)->hot_backup = true; \
- (conn)->hot_backup_list = NULL; \
+#define WT_CONN_HOTBACKUP_START(conn) \
+ do { \
+ (conn)->hot_backup_start = (conn)->ckpt_finish_secs; \
+ (conn)->hot_backup_list = NULL; \
} while (0)
/*
@@ -269,13 +269,14 @@ struct __wt_connection_impl {
WT_TXN_GLOBAL txn_global; /* Global transaction state */
WT_RWLOCK hot_backup_lock; /* Hot backup serialization */
- bool hot_backup; /* Hot backup in progress */
+ uint64_t hot_backup_start; /* Clock value of most recent checkpoint needed by hot backup */
char **hot_backup_list; /* Hot backup file list */
WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
wt_thread_t ckpt_tid; /* Checkpoint thread */
bool ckpt_tid_set; /* Checkpoint thread set */
WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */
+ uint64_t ckpt_finish_secs; /* Clock value of last completed checkpoint */
#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0)
wt_off_t ckpt_logsize; /* Checkpoint log size period */
bool ckpt_signalled; /* Checkpoint signalled */
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index dfe6d209357..522a031630e 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -6,6 +6,9 @@
* See the file LICENSE for redistribution information.
*/
+/* Get the session from any cursor. */
+#define CUR2S(c) ((WT_SESSION_IMPL *)((WT_CURSOR *)c)->session)
+
/*
* Initialize a static WT_CURSOR structure.
*/
@@ -178,7 +181,10 @@ struct __wt_cursor_btree {
* The update structure allocated by the row- and column-store modify functions, used to avoid a
* data copy in the WT_CURSOR.update call.
*/
- WT_UPDATE *modify_update;
+ WT_UPDATE_VALUE *modify_update, _modify_update;
+
+ /* An intermediate structure to hold the update value to be assigned to the cursor buffer. */
+ WT_UPDATE_VALUE *upd_value, _upd_value;
/*
* Fixed-length column-store items are a single byte, and it's simpler and cheaper to allocate
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 14de00f80c3..81cc28feb08 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -24,7 +24,7 @@ __cursor_set_recno(WT_CURSOR_BTREE *cbt, uint64_t v)
static inline int
__cursor_copy_release(WT_CURSOR *cursor)
{
- if (F_ISSET(S2C((WT_SESSION_IMPL *)cursor->session), WT_CONN_DEBUG_CURSOR_COPY)) {
+ if (F_ISSET(S2C(CUR2S(cursor)), WT_CONN_DEBUG_CURSOR_COPY)) {
if (F_ISSET(cursor, WT_CURSTD_DEBUG_COPY_KEY)) {
WT_RET(__wt_cursor_copy_release_item(cursor, &cursor->key));
F_CLR(cursor, WT_CURSTD_DEBUG_COPY_KEY);
@@ -77,8 +77,7 @@ __cursor_localkey(WT_CURSOR *cursor)
{
if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
if (!WT_DATA_IN_ITEM(&cursor->key))
- WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->key, cursor->key.data,
- cursor->key.size));
+ WT_RET(__wt_buf_set(CUR2S(cursor), &cursor->key, cursor->key.data, cursor->key.size));
F_CLR(cursor, WT_CURSTD_KEY_INT);
F_SET(cursor, WT_CURSTD_KEY_EXT);
}
@@ -94,8 +93,8 @@ __cursor_localvalue(WT_CURSOR *cursor)
{
if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
if (!WT_DATA_IN_ITEM(&cursor->value))
- WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, &cursor->value,
- cursor->value.data, cursor->value.size));
+ WT_RET(
+ __wt_buf_set(CUR2S(cursor), &cursor->value, cursor->value.data, cursor->value.size));
F_CLR(cursor, WT_CURSTD_VALUE_INT);
F_SET(cursor, WT_CURSTD_VALUE_EXT);
}
@@ -193,7 +192,7 @@ __cursor_reset(WT_CURSOR_BTREE *cbt)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
__cursor_pos_clear(cbt);
@@ -242,7 +241,7 @@ __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap)
WT_SESSION_IMPL *session;
cindex = (WT_CURSOR_INDEX *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_RET(__cursor_checkvalue(cursor));
if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) {
@@ -269,7 +268,7 @@ __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap)
WT_SESSION_IMPL *session;
ctable = (WT_CURSOR_TABLE *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
primary = *ctable->cg_cursors;
WT_RET(__cursor_checkvalue(primary));
@@ -354,10 +353,10 @@ __wt_cursor_disable_bulk(WT_SESSION_IMPL *session)
* Return a page referenced key/value pair to the application.
*/
static inline int
-__cursor_kv_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__cursor_kv_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
{
WT_RET(__wt_key_return(cbt));
- WT_RET(__wt_value_return(cbt, upd));
+ WT_RET(__wt_value_return(cbt, upd_value));
return (0);
}
@@ -371,7 +370,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
if (reenter) {
#ifdef HAVE_DIAGNOSTIC
@@ -421,7 +420,7 @@ __cursor_row_slot_key_return(
*kpack_used = false;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = cbt->ref->page;
diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h
index 04a1fd3c5ae..76c644a2850 100644
--- a/src/third_party/wiredtiger/src/include/error.h
+++ b/src/third_party/wiredtiger/src/include/error.h
@@ -22,6 +22,8 @@
#define __wt_err(session, error, ...) __wt_err_func(session, error, __func__, __LINE__, __VA_ARGS__)
#define __wt_errx(session, ...) __wt_errx_func(session, __func__, __LINE__, __VA_ARGS__)
+#define __wt_panic(session, error, ...) \
+ __wt_panic_func(session, error, __func__, __LINE__, __VA_ARGS__)
#define __wt_set_return(session, error) __wt_set_return_func(session, __func__, __LINE__, error)
/* Set "ret" and branch-to-err-label tests. */
@@ -47,6 +49,9 @@
#define WT_ERR_ERROR_OK(a, e, keep) WT_ERR_TEST((ret = (a)) != 0 && ret != (e), ret, keep)
#define WT_ERR_NOTFOUND_OK(a, keep) WT_ERR_ERROR_OK(a, WT_NOTFOUND, keep)
+/* Return WT_PANIC regardless of earlier return codes. */
+#define WT_ERR_PANIC(session, v, ...) WT_ERR(__wt_panic(session, v, __VA_ARGS__))
+
/* Return tests. */
#define WT_RET(a) \
do { \
@@ -99,27 +104,13 @@
#define WT_TRET_BUSY_OK(a) WT_TRET_ERROR_OK(a, EBUSY)
#define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND)
-/* Called on unexpected code path: locate the failure. */
-#define __wt_illegal_value(session, v) \
- __wt_illegal_value_func(session, (uintmax_t)(v), __func__, __LINE__)
+/* Return WT_PANIC regardless of earlier return codes. */
+#define WT_RET_PANIC(session, v, ...) return (__wt_panic(session, v, __VA_ARGS__))
-#define WT_PANIC_MSG(session, v, ...) \
- do { \
- __wt_err(session, v, __VA_ARGS__); \
- WT_IGNORE_RET(__wt_panic(session)); \
- } while (0)
-#define WT_PANIC_ERR(session, v, ...) \
- do { \
- WT_PANIC_MSG(session, v, __VA_ARGS__); \
- /* Return WT_PANIC regardless of earlier return codes. */ \
- WT_ERR(WT_PANIC); \
- } while (0)
-#define WT_PANIC_RET(session, v, ...) \
- do { \
- WT_PANIC_MSG(session, v, __VA_ARGS__); \
- /* Return WT_PANIC regardless of earlier return codes. */ \
- return (WT_PANIC); \
- } while (0)
+/* Called on unexpected code path: locate the failure. */
+#define __wt_illegal_value(session, v) \
+ __wt_panic(session, EINVAL, "%s: 0x%" PRIxMAX, \
+ "encountered an illegal file format or internal value", (uintmax_t)(v))
/*
* WT_ERR_ASSERT, WT_RET_ASSERT, WT_ASSERT
@@ -149,6 +140,13 @@
__wt_abort(session); \
} \
} while (0)
+#define WT_RET_PANIC_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) { \
+ __wt_err(session, v, __VA_ARGS__); \
+ __wt_abort(session); \
+ } \
+ } while (0)
#else
#define WT_ASSERT(session, exp) WT_UNUSED(session)
#define WT_ERR_ASSERT(session, exp, v, ...) \
@@ -161,6 +159,11 @@
if (!(exp)) \
WT_RET_MSG(session, v, __VA_ARGS__); \
} while (0)
+#define WT_RET_PANIC_ASSERT(session, exp, v, ...) \
+ do { \
+ if (!(exp)) \
+ WT_RET_PANIC(session, v, __VA_ARGS__); \
+ } while (0)
#endif
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index a3e71435037..a5dfb85bf89 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -34,8 +34,12 @@ extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern char *__wt_time_aggregate_to_string(WT_TIME_AGGREGATE *ta, char *ta_string)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern char *__wt_time_pair_to_string(wt_timestamp_t timestamp, uint64_t txn_id, char *tp_string)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern char *__wt_time_window_to_string(WT_TIME_WINDOW *tw, char *tw_string)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern char *__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method)
@@ -184,8 +188,6 @@ extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *blo
extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[],
bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
wt_off_t offset, uint32_t size, uint32_t checksum)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -280,7 +282,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa
extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
+extern int __wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btree_close(WT_SESSION_IMPL *session)
@@ -559,8 +561,8 @@ extern int __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE **updp,
- bool *valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, bool *valid)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[],
@@ -720,8 +722,8 @@ extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path
extern int __wt_filename_construct(WT_SESSION_IMPL *session, const char *path,
const char *file_prefix, uintmax_t id_1, uint32_t id_2, WT_ITEM *buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno,
- WT_UPDATE **updp, bool allow_prepare, WT_ITEM *on_disk_buf)
+extern int __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
+ uint64_t recno, WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags,
uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -762,8 +764,8 @@ extern int __wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page,
- WT_MULTI *multi) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg,
@@ -772,9 +774,6 @@ extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, cons
size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt,
u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_illegal_value_func(WT_SESSION_IMPL *session, uintmax_t v, const char *func,
- int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_import(WT_SESSION_IMPL *session, const char *uri)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag)
@@ -1087,15 +1086,15 @@ extern int __wt_metadata_update(WT_SESSION_IMPL *session, const char *key, const
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_metadata_update_base_write_gen(WT_SESSION_IMPL *session, const char *config)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply_item(WT_SESSION_IMPL *session, WT_ITEM *value, const void *modify,
- bool sformat) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_apply_item(WT_SESSION_IMPL *session, const char *value_format,
+ WT_ITEM *value, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_pack(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries, WT_ITEM **modifyp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_reconstruct_from_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+ WT_UPDATE *upd, WT_UPDATE_VALUE *upd_value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_vector_push(WT_MODIFY_VECTOR *modifies, WT_UPDATE *upd)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...)
@@ -1157,9 +1156,11 @@ extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
- WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_panic_func(
+ WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt, ...)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 5, 6)))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")))
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
@@ -1177,9 +1178,7 @@ extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocate
extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret,
size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv,
- uint8_t type, wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ uint8_t type, WT_TIME_WINDOW *tw, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref,
bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
@@ -1484,7 +1483,7 @@ extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestam
const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_recover(WT_SESSION_IMPL *session)
+extern int __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1513,12 +1512,10 @@ extern int __wt_unexpected_object_type(
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+extern int __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf,
- WT_TIME_PAIR *start, WT_TIME_PAIR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
@@ -1591,6 +1588,7 @@ extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci)
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
extern void __wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext);
extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
+extern void __wt_block_set_readonly(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
extern void __wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz);
extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats);
extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash);
@@ -1604,9 +1602,7 @@ extern void __wt_cache_stats_update(WT_SESSION_IMPL *session);
extern void __wt_capacity_throttle(WT_SESSION_IMPL *session, uint64_t bytes, WT_THROTTLE_TYPE type);
extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing);
extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
-extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session,
- wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
+extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta);
extern void __wt_ckpt_verbose(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag,
const char *ckpt_name, const uint8_t *ckpt_string);
extern void __wt_cond_auto_wait(
@@ -1707,12 +1703,9 @@ extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_random_init_seed(WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_read_cell_time_pairs(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_PAIR *start, WT_TIME_PAIR *stop);
-extern void __wt_read_col_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_PAIR *start, WT_TIME_PAIR *stop);
-extern void __wt_read_row_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_PAIR *start, WT_TIME_PAIR *stop);
+extern void __wt_read_cell_time_window(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_WINDOW *tw);
+extern void __wt_read_row_time_window(
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_WINDOW *tw);
extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r);
@@ -1833,6 +1826,14 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref,
WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_time_aggregate_is_empty(WT_TIME_AGGREGATE *ta)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_time_window_is_empty(WT_TIME_WINDOW *tw)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_time_windows_equal(WT_TIME_WINDOW *tw1, WT_TIME_WINDOW *tw2)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline bool __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session,
+ WT_UPDATE_VALUE *upd_value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline bool __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
@@ -1844,7 +1845,7 @@ static inline bool __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id,
static inline double __wt_eviction_dirty_target(WT_CACHE *cache)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_bt_col_var_cursor_walk_txn_read(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_COL *cip, WT_UPDATE **updp)
+ WT_CURSOR_BTREE *cbt, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_COL *cip)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr,
size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1951,13 +1952,11 @@ static inline int __wt_page_swap_func(
static inline int __wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- const void *data, size_t size, wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts,
- uint64_t start_txn, wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn,
- bool prepare, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle, WT_REC_KV *val) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ const void *data, size_t size, WT_TIME_WINDOW *tw, uint64_t rle)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_rec_dict_replace(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_TIME_WINDOW *tw, uint64_t rle, WT_REC_KV *val)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_row_leaf_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
@@ -2008,10 +2007,10 @@ static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *
static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key,
- uint64_t recno, WT_UPDATE *upd, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
+ uint64_t recno, WT_UPDATE *upd, WT_CELL_UNPACK *vpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
+ WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_search_check(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_update_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
@@ -2048,30 +2047,20 @@ static inline int __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t
static inline int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
const void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type,
- uint64_t recno, wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts,
- uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts,
- uint64_t newest_stop_txn, bool prepare, size_t size)
+ uint64_t recno, WT_TIME_AGGREGATE *ta, size_t size)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ WT_TIME_WINDOW *tw, uint64_t rle, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline size_t __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw,
+ uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_int_key(WT_CELL *cell, size_t size)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
- wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ WT_TIME_WINDOW *tw, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell,
- wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ WT_TIME_WINDOW *tw, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_strnlen(const char *s, size_t maxlen)
@@ -2151,9 +2140,7 @@ static inline void __wt_cell_unpack(
WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack);
static inline void __wt_cell_unpack_dsk(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack);
-static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session,
- wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn);
+static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta);
static inline void __wt_cond_wait(
WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *));
static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session);
@@ -2166,12 +2153,7 @@ static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
-static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *start_durable_tsp,
- wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *stop_durable_tsp,
- wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp, bool *preparep);
-static inline void __wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, bool prepare);
+static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, WT_TIME_AGGREGATE *ta);
static inline void __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_ADDR *addr, WT_CELL_UNPACK *vpack, bool proxy_cell, uint64_t recno);
static inline void __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv);
@@ -2194,6 +2176,16 @@ static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
static inline void __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
static inline void __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *sizep);
+static inline void __wt_time_aggregate_copy(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source);
+static inline void __wt_time_aggregate_init(WT_TIME_AGGREGATE *ta);
+static inline void __wt_time_aggregate_init_max(WT_TIME_AGGREGATE *ta);
+static inline void __wt_time_aggregate_merge(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source);
+static inline void __wt_time_aggregate_update(WT_TIME_AGGREGATE *ta, WT_TIME_WINDOW *tw);
+static inline void __wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source);
+static inline void __wt_time_window_init(WT_TIME_WINDOW *tw);
+static inline void __wt_time_window_init_max(WT_TIME_WINDOW *tw);
+static inline void __wt_time_window_set_start(WT_TIME_WINDOW *tw, WT_UPDATE *upd);
+static inline void __wt_time_window_set_stop(WT_TIME_WINDOW *tw, WT_UPDATE *upd);
static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag);
static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session);
static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session);
@@ -2208,3 +2200,5 @@ static inline void __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timest
static inline void __wt_txn_read_last(WT_SESSION_IMPL *session);
static inline void __wt_txn_timestamp_flags(WT_SESSION_IMPL *session);
static inline void __wt_txn_unmodify(WT_SESSION_IMPL *session);
+static inline void __wt_upd_value_assign(WT_UPDATE_VALUE *upd_value, WT_UPDATE *upd);
+static inline void __wt_upd_value_clear(WT_UPDATE_VALUE *upd_value);
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 9b0ae3c4a72..f9160c6b28c 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -133,13 +133,7 @@ struct __wt_ckpt {
WT_BLOCK_MODS backup_blocks[WT_BLKINCR_MAX];
- /* Validity window */
- wt_timestamp_t start_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t stop_durable_ts;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
+ WT_TIME_AGGREGATE ta; /* Validity window */
WT_ITEM addr; /* Checkpoint cookie string */
WT_ITEM raw; /* Checkpoint cookie raw */
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index e2b08cd088b..a75c421f882 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -168,7 +168,7 @@ __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
WT_DECL_RET;
if ((ret = pthread_mutex_lock(&t->lock)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_mutex_lock: %s", t->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_mutex_lock: %s", t->name));
}
#endif
@@ -182,7 +182,7 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
WT_DECL_RET;
if ((ret = pthread_mutex_unlock(&t->lock)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_mutex_unlock: %s", t->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_mutex_unlock: %s", t->name));
}
#elif SPINLOCK_TYPE == SPINLOCK_MSVC
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 43581c7cc1f..647c015e26e 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -111,24 +111,13 @@ struct __wt_reconcile {
uint32_t entries;
uint64_t recno;
WT_ITEM key;
- wt_timestamp_t newest_start_durable_ts;
- wt_timestamp_t oldest_start_ts;
- uint64_t oldest_start_txn;
- wt_timestamp_t newest_stop_durable_ts;
- wt_timestamp_t newest_stop_ts;
- uint64_t newest_stop_txn;
- bool prepare;
+ WT_TIME_AGGREGATE ta;
/* Saved minimum split-size boundary information. */
uint32_t min_entries;
uint64_t min_recno;
WT_ITEM min_key;
- wt_timestamp_t min_newest_start_durable_ts;
- wt_timestamp_t min_oldest_start_ts;
- uint64_t min_oldest_start_txn;
- wt_timestamp_t min_newest_stop_durable_ts;
- wt_timestamp_t min_newest_stop_ts;
- uint64_t min_newest_stop_txn;
+ WT_TIME_AGGREGATE ta_min;
size_t min_offset; /* byte offset */
@@ -241,13 +230,7 @@ struct __wt_reconcile {
typedef struct {
WT_UPDATE *upd; /* Update to write (or NULL) */
- wt_timestamp_t start_durable_ts; /* Transaction IDs, timestamps */
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_durable_ts;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
- bool prepare;
+ WT_TIME_WINDOW tw;
} WT_UPDATE_SELECT;
/*
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
index a65daaf9715..3f9339a81ab 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.i
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -43,9 +43,7 @@ __wt_rec_need_split(WT_RECONCILE *r, size_t len)
* Initialize an address timestamp triplet.
*/
static inline void
-__wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *start_durable_tsp,
- wt_timestamp_t *oldest_start_tsp, uint64_t *oldest_start_txnp, wt_timestamp_t *stop_durable_tsp,
- wt_timestamp_t *newest_stop_tsp, uint64_t *newest_stop_txnp, bool *preparep)
+__wt_rec_addr_ts_init(WT_RECONCILE *r, WT_TIME_AGGREGATE *ta)
{
/*
* If the page is not fixed-length column-store, where we don't maintain timestamps at all, set
@@ -53,40 +51,10 @@ __wt_rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *start_durable_tsp,
* corrected as we process key/value items. Otherwise, set the oldest/newest timestamps to
* simple durability.
*/
- *start_durable_tsp = WT_TS_NONE;
- *oldest_start_tsp = WT_TS_MAX;
- *oldest_start_txnp = WT_TXN_MAX;
- *stop_durable_tsp = WT_TS_NONE;
- *newest_stop_tsp = WT_TS_NONE;
- *newest_stop_txnp = WT_TXN_NONE;
- *preparep = false;
- if (r->page->type == WT_PAGE_COL_FIX) {
- *oldest_start_tsp = WT_TS_NONE;
- *oldest_start_txnp = WT_TXN_NONE;
- *newest_stop_tsp = WT_TS_MAX;
- *newest_stop_txnp = WT_TXN_MAX;
- }
-}
-
-/*
- * __wt_rec_addr_ts_update --
- * Update the chunk's timestamp information.
- */
-static inline void
-__wt_rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, bool prepare)
-{
- r->cur_ptr->newest_start_durable_ts =
- WT_MAX(start_durable_ts, r->cur_ptr->newest_start_durable_ts);
- r->cur_ptr->oldest_start_ts = WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts);
- r->cur_ptr->oldest_start_txn = WT_MIN(oldest_start_txn, r->cur_ptr->oldest_start_txn);
- r->cur_ptr->newest_stop_durable_ts =
- WT_MAX(stop_durable_ts, r->cur_ptr->newest_stop_durable_ts);
- r->cur_ptr->newest_stop_ts = WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts);
- r->cur_ptr->newest_stop_txn = WT_MAX(newest_stop_txn, r->cur_ptr->newest_stop_txn);
- if (prepare)
- r->cur_ptr->prepare = true;
+ if (r->page->type == WT_PAGE_COL_FIX)
+ __wt_time_aggregate_init(ta);
+ else
+ __wt_time_aggregate_init_max(ta);
}
/*
@@ -201,17 +169,13 @@ __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *add
val->buf.data = addr->addr;
val->buf.size = addr->size;
val->cell_len =
- __wt_cell_pack_addr(session, &val->cell, cell_type, recno, addr->newest_start_durable_ts,
- addr->oldest_start_ts, addr->oldest_start_txn, addr->newest_stop_durable_ts,
- addr->newest_stop_ts, addr->newest_stop_txn, addr->prepare, val->buf.size);
+ __wt_cell_pack_addr(session, &val->cell, cell_type, recno, &addr->ta, val->buf.size);
} else {
WT_ASSERT(session, addr == NULL);
val->buf.data = vpack->data;
val->buf.size = vpack->size;
- val->cell_len = __wt_cell_pack_addr(session, &val->cell, cell_type, recno,
- vpack->newest_start_durable_ts, vpack->oldest_start_ts, vpack->oldest_start_txn,
- vpack->newest_stop_durable_ts, vpack->newest_stop_ts, vpack->newest_stop_txn,
- F_ISSET(vpack, WT_CELL_UNPACK_PREPARE), val->buf.size);
+ val->cell_len =
+ __wt_cell_pack_addr(session, &val->cell, cell_type, recno, &vpack->ta, val->buf.size);
}
val->len = val->cell_len + val->buf.size;
@@ -223,9 +187,7 @@ __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *add
*/
static inline int
__wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *data, size_t size,
- wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle)
+ WT_TIME_WINDOW *tw, uint64_t rle)
{
WT_BTREE *btree;
WT_REC_KV *val;
@@ -251,13 +213,13 @@ __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *d
if (val->buf.size > btree->maxleafvalue) {
WT_STAT_DATA_INCR(session, rec_overflow_value);
- return (__wt_rec_cell_build_ovfl(session, r, val, WT_CELL_VALUE_OVFL, durable_start_ts,
- start_ts, start_txn, durable_stop_ts, stop_ts, stop_txn, prepare, rle));
+ return (__wt_rec_cell_build_ovfl(session, r, val, WT_CELL_VALUE_OVFL, tw, rle));
}
}
+ if (tw->prepare)
+ WT_STAT_DATA_INCR(session, rec_prepare_value);
- val->cell_len = __wt_cell_pack_value(session, &val->cell, durable_start_ts, start_ts, start_txn,
- durable_stop_ts, stop_ts, stop_txn, prepare, rle, val->buf.size);
+ val->cell_len = __wt_cell_pack_value(session, &val->cell, tw, rle, val->buf.size);
val->len = val->cell_len + val->buf.size;
return (0);
@@ -268,9 +230,8 @@ __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, const void *d
* Check for a dictionary match.
*/
static inline int
-__wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t start_durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, WT_REC_KV *val)
+__wt_rec_dict_replace(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_TIME_WINDOW *tw, uint64_t rle, WT_REC_KV *val)
{
WT_REC_DICTIONARY *dp;
uint64_t offset;
@@ -306,8 +267,7 @@ __wt_rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t
* offset from the beginning of the page.
*/
offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset);
- val->len = val->cell_len = __wt_cell_pack_copy(session, &val->cell, start_durable_ts,
- start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, rle, offset);
+ val->len = val->cell_len = __wt_cell_pack_copy(session, &val->cell, tw, rle, offset);
val->buf.data = NULL;
val->buf.size = 0;
}
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index 69d948e70fd..83827896f43 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -264,7 +264,7 @@ struct __wt_table {
if ((skipp) != (bool *)NULL) \
*(bool *)(skipp) = true; \
if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- if (!__conn->hot_backup) { \
+ if (__conn->hot_backup_start == 0) { \
if ((skipp) != (bool *)NULL) \
*(bool *)(skipp) = false; \
op; \
@@ -272,7 +272,7 @@ struct __wt_table {
} else { \
__wt_readlock(session, &__conn->hot_backup_lock); \
F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- if (!__conn->hot_backup) { \
+ if (__conn->hot_backup_start == 0) { \
if ((skipp) != (bool *)NULL) \
*(bool *)(skipp) = false; \
op; \
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 25447813d55..e2cd24d0e95 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -167,32 +167,31 @@ struct __wt_session_impl {
#define WT_SESSION_CAN_WAIT 0x00000008u
#define WT_SESSION_HS_CURSOR 0x00000010u
#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000020u
-#define WT_SESSION_IGNORE_HS_TOMBSTONE 0x00000040u
-#define WT_SESSION_INTERNAL 0x00000080u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000100u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000200u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000400u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000800u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00001000u
-#define WT_SESSION_LOCKED_METADATA 0x00002000u
-#define WT_SESSION_LOCKED_PASS 0x00004000u
-#define WT_SESSION_LOCKED_SCHEMA 0x00008000u
-#define WT_SESSION_LOCKED_SLOT 0x00010000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x00020000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00040000u
-#define WT_SESSION_LOCKED_TURTLE 0x00080000u
-#define WT_SESSION_LOGGING_INMEM 0x00100000u
-#define WT_SESSION_NO_DATA_HANDLES 0x00200000u
-#define WT_SESSION_NO_LOGGING 0x00400000u
-#define WT_SESSION_NO_RECONCILE 0x00800000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x01000000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u
-#define WT_SESSION_READ_WONT_NEED 0x04000000u
-#define WT_SESSION_RESOLVING_MODIFY 0x08000000u
-#define WT_SESSION_RESOLVING_TXN 0x10000000u
-#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000000u
-#define WT_SESSION_SCHEMA_TXN 0x40000000u
-#define WT_SESSION_SERVER_ASYNC 0x80000000u
+#define WT_SESSION_INTERNAL 0x00000040u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000080u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000100u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000200u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000400u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00000800u
+#define WT_SESSION_LOCKED_METADATA 0x00001000u
+#define WT_SESSION_LOCKED_PASS 0x00002000u
+#define WT_SESSION_LOCKED_SCHEMA 0x00004000u
+#define WT_SESSION_LOCKED_SLOT 0x00008000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x00010000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x00020000u
+#define WT_SESSION_LOCKED_TURTLE 0x00040000u
+#define WT_SESSION_LOGGING_INMEM 0x00080000u
+#define WT_SESSION_NO_DATA_HANDLES 0x00100000u
+#define WT_SESSION_NO_LOGGING 0x00200000u
+#define WT_SESSION_NO_RECONCILE 0x00400000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x00800000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x01000000u
+#define WT_SESSION_READ_WONT_NEED 0x02000000u
+#define WT_SESSION_RESOLVING_MODIFY 0x04000000u
+#define WT_SESSION_RESOLVING_TXN 0x08000000u
+#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u
+#define WT_SESSION_SCHEMA_TXN 0x20000000u
+#define WT_SESSION_SERVER_ASYNC 0x40000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index d1c7888e54f..59936321cbc 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -868,6 +868,7 @@ struct __wt_dsrc_stats {
int64_t rec_pages;
int64_t rec_pages_eviction;
int64_t rec_page_delete;
+ int64_t rec_prepare_value;
int64_t session_compact;
int64_t txn_update_conflict;
};
diff --git a/src/third_party/wiredtiger/src/include/timestamp.h b/src/third_party/wiredtiger/src/include/timestamp.h
new file mode 100644
index 00000000000..a014cc8f624
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/timestamp.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2014-2020 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * We format timestamps in a couple of ways, declare appropriate sized buffers. Hexadecimal is 2x
+ * the size of the value. MongoDB format (high/low pairs of 4B unsigned integers, with surrounding
+ * parenthesis and separating comma and space), is 2x the maximum digits from a 4B unsigned integer
+ * plus 4. Both sizes include a trailing null byte as well.
+ */
+#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1)
+#define WT_TS_INT_STRING_SIZE (2 * 10 + 4 + 1)
+
+/*
+ * We need an appropriately sized buffer for formatted time pairs, aggregates and windows. This is
+ * for time windows with 4 timestamps, 2 transaction IDs, prepare state and formatting. The
+ * formatting is currently about 32 characters - enough space that we don't need to think about it.
+ */
+#define WT_TP_STRING_SIZE (WT_TS_INT_STRING_SIZE + 1 + 20 + 1)
+#define WT_TIME_STRING_SIZE (WT_TS_INT_STRING_SIZE * 4 + 20 * 2 + 64)
+
+/* The time pairs that define a value's time window and associated prepare information. */
+struct __wt_time_window {
+ wt_timestamp_t durable_start_ts; /* default value: WT_TS_NONE */
+ wt_timestamp_t start_ts; /* default value: WT_TS_NONE */
+ uint64_t start_txn; /* default value: WT_TXN_NONE */
+
+ wt_timestamp_t durable_stop_ts; /* default value: WT_TS_NONE */
+ wt_timestamp_t stop_ts; /* default value: WT_TS_MAX */
+ uint64_t stop_txn; /* default value: WT_TXN_MAX */
+
+ /*
+ * Prepare information isn't really part of a time window, but we need to aggregate it to the
+ * internal page information in reconciliation, and this is the simplest place to put it.
+ */
+ uint8_t prepare;
+};
+
+/* The time pairs that define an aggregated time window and associated prepare information. */
+struct __wt_time_aggregate {
+ wt_timestamp_t newest_start_durable_ts; /* default value: WT_TS_NONE */
+ wt_timestamp_t newest_stop_durable_ts; /* default value: WT_TS_NONE */
+
+ wt_timestamp_t oldest_start_ts; /* default value: WT_TS_NONE */
+ uint64_t oldest_start_txn; /* default value: WT_TXN_NONE */
+ wt_timestamp_t newest_stop_ts; /* default value: WT_TS_MAX */
+ uint64_t newest_stop_txn; /* default value: WT_TXN_MAX */
+
+ uint8_t prepare;
+};
diff --git a/src/third_party/wiredtiger/src/include/timestamp.i b/src/third_party/wiredtiger/src/include/timestamp.i
new file mode 100644
index 00000000000..c6e7d66ffef
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/timestamp.i
@@ -0,0 +1,225 @@
+/*-
+ * Copyright (c) 2014-2020 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * __wt_time_window_init --
+ * Initialize the fields in a time window to their defaults.
+ */
+static inline void
+__wt_time_window_init(WT_TIME_WINDOW *tw)
+{
+ tw->durable_start_ts = WT_TS_NONE;
+ tw->start_ts = WT_TS_NONE;
+ tw->start_txn = WT_TXN_NONE;
+
+ tw->durable_stop_ts = WT_TS_NONE;
+ tw->stop_ts = WT_TS_MAX;
+ tw->stop_txn = WT_TXN_MAX;
+
+ tw->prepare = 0;
+}
+
+/*
+ * __wt_time_window_init_max --
+ * Initialize the fields in a time window to values that force an override.
+ */
+static inline void
+__wt_time_window_init_max(WT_TIME_WINDOW *tw)
+{
+ tw->durable_start_ts = WT_TS_MAX;
+ tw->start_ts = WT_TS_MAX;
+ tw->start_txn = WT_TXN_MAX;
+
+ tw->durable_stop_ts = WT_TS_MAX;
+ tw->stop_ts = WT_TS_NONE;
+ tw->stop_txn = WT_TXN_NONE;
+
+ tw->prepare = 0;
+}
+
+/*
+ * __wt_time_window_copy --
+ * Copy the values from one time window structure to another.
+ */
+static inline void
+__wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source)
+{
+ *dest = *source;
+}
+
+/*
+ * __wt_time_window_is_empty --
+ * Return true if the time window is equivalent to the default time window.
+ */
+static inline bool
+__wt_time_window_is_empty(WT_TIME_WINDOW *tw)
+{
+ return (tw->durable_start_ts == WT_TS_NONE && tw->start_ts == WT_TS_NONE &&
+ tw->start_txn == WT_TXN_NONE && tw->durable_stop_ts == WT_TS_NONE &&
+ tw->stop_ts == WT_TS_MAX && tw->stop_txn == WT_TXN_MAX && tw->prepare == 0);
+}
+
+/*
+ * __wt_time_windows_equal --
+ * Return true if the time windows are the same.
+ */
+static inline bool
+__wt_time_windows_equal(WT_TIME_WINDOW *tw1, WT_TIME_WINDOW *tw2)
+{
+ return (tw1->durable_start_ts == tw2->durable_start_ts && tw1->start_ts == tw2->start_ts &&
+ tw1->start_txn == tw2->start_txn && tw1->durable_stop_ts == tw2->durable_stop_ts &&
+ tw1->stop_ts == tw2->stop_ts && tw1->stop_txn == tw2->stop_txn &&
+ tw1->prepare == tw2->prepare);
+}
+
+/*
+ * __wt_time_window_set_start --
+ * Set the start values of a time window from those in an update structure.
+ */
+static inline void
+__wt_time_window_set_start(WT_TIME_WINDOW *tw, WT_UPDATE *upd)
+{
+ /*
+ * Durable timestamp can be 0 for prepared updates, in those cases use the prepared timestamp as
+ * durable timestamp.
+ */
+ tw->durable_start_ts = tw->start_ts = upd->start_ts;
+ if (upd->durable_ts != WT_TS_NONE)
+ tw->durable_start_ts = upd->durable_ts;
+ tw->start_txn = upd->txnid;
+}
+
+/*
+ * __wt_time_window_set_stop --
+ * Set the start values of a time window from those in an update structure.
+ */
+static inline void
+__wt_time_window_set_stop(WT_TIME_WINDOW *tw, WT_UPDATE *upd)
+{
+ /*
+ * Durable timestamp can be 0 for prepared updates, in those cases use the prepared timestamp as
+ * durable timestamp.
+ */
+ tw->durable_stop_ts = tw->stop_ts = upd->start_ts;
+ if (upd->durable_ts != WT_TS_NONE)
+ tw->durable_stop_ts = upd->durable_ts;
+ tw->stop_txn = upd->txnid;
+}
+
+/*
+ * __wt_time_aggregate_init --
+ * Initialize the fields in an aggregated time window to their defaults.
+ */
+static inline void
+__wt_time_aggregate_init(WT_TIME_AGGREGATE *ta)
+{
+ /*
+ * The aggregated durable timestamp values represent the maximum durable timestamp over set of
+ * timestamps. These aggregated max values are used for rollback to stable operation to find out
+ * whether the page has any timestamp updates more than stable timestamp.
+ */
+ ta->newest_start_durable_ts = WT_TS_NONE;
+ ta->newest_stop_durable_ts = WT_TS_NONE;
+
+ ta->oldest_start_ts = WT_TS_NONE;
+ ta->oldest_start_txn = WT_TXN_NONE;
+
+ ta->newest_stop_ts = WT_TS_MAX;
+ ta->newest_stop_txn = WT_TXN_MAX;
+
+ ta->prepare = 0;
+}
+
+/*
+ * __wt_time_aggregate_init_max --
+ * Initialize the fields in an aggregated time window to maximum values, since this structure is
+ * generally populated by iterating over a set of timestamps and calculating max/min seen for
+ * each value, it's useful to be able to start with a negatively initialized structure.
+ */
+static inline void
+__wt_time_aggregate_init_max(WT_TIME_AGGREGATE *ta)
+{
+ /*
+ * The aggregated durable timestamp values represent the maximum durable timestamp over set of
+ * timestamps. These aggregated max values are used for rollback to stable operation to find out
+ * whether the page has any timestamp updates more than stable timestamp.
+ */
+ ta->newest_start_durable_ts = WT_TS_NONE;
+ ta->newest_stop_durable_ts = WT_TS_NONE;
+
+ ta->oldest_start_ts = WT_TS_MAX;
+ ta->oldest_start_txn = WT_TXN_MAX;
+
+ ta->newest_stop_ts = WT_TS_NONE;
+ ta->newest_stop_txn = WT_TXN_NONE;
+
+ ta->prepare = 0;
+}
+
+/*
+ * __wt_time_aggregate_is_empty --
+ * Return true if the time aggregate is equivalent to the default time aggregate.
+ */
+static inline bool
+__wt_time_aggregate_is_empty(WT_TIME_AGGREGATE *ta)
+{
+ return (ta->newest_start_durable_ts == WT_TS_NONE && ta->newest_stop_durable_ts == WT_TS_NONE &&
+ ta->oldest_start_ts == WT_TS_MAX && ta->oldest_start_txn == WT_TXN_MAX &&
+ ta->newest_stop_ts == WT_TS_NONE && ta->newest_stop_txn == WT_TXN_NONE && ta->prepare == 0);
+}
+
+/*
+ * __wt_time_aggregate_copy --
+ * Copy the values from one time aggregate structure to another.
+ */
+static inline void
+__wt_time_aggregate_copy(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source)
+{
+ *dest = *source;
+}
+
+/*
+ * __wt_time_aggregate_update --
+ * Update the aggregated window to reflect for a new time window.
+ */
+static inline void
+__wt_time_aggregate_update(WT_TIME_AGGREGATE *ta, WT_TIME_WINDOW *tw)
+{
+ ta->newest_start_durable_ts = WT_MAX(tw->durable_start_ts, ta->newest_start_durable_ts);
+ ta->newest_stop_durable_ts = WT_MAX(tw->durable_stop_ts, ta->newest_stop_durable_ts);
+
+ ta->oldest_start_ts = WT_MIN(tw->start_ts, ta->oldest_start_ts);
+ ta->oldest_start_txn = WT_MIN(tw->start_txn, ta->oldest_start_txn);
+ ta->newest_stop_ts = WT_MAX(tw->stop_ts, ta->newest_stop_ts);
+ ta->newest_stop_txn = WT_MAX(tw->stop_txn, ta->newest_stop_txn);
+
+ if (tw->prepare != 0)
+ ta->prepare = 1;
+}
+
+/*
+ * __wt_time_aggregate_merge --
+ * Merge an aggregated time window into another - choosing the most conservative value from
+ * each.
+ */
+static inline void
+__wt_time_aggregate_merge(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source)
+{
+ dest->newest_start_durable_ts =
+ WT_MAX(dest->newest_start_durable_ts, source->newest_start_durable_ts);
+ dest->newest_stop_durable_ts =
+ WT_MAX(dest->newest_stop_durable_ts, source->newest_stop_durable_ts);
+
+ dest->oldest_start_ts = WT_MIN(dest->oldest_start_ts, source->oldest_start_ts);
+ dest->oldest_start_txn = WT_MIN(dest->oldest_start_txn, source->oldest_start_txn);
+ dest->newest_stop_ts = WT_MAX(dest->newest_stop_ts, source->newest_stop_ts);
+ dest->newest_stop_txn = WT_MAX(dest->newest_stop_txn, source->newest_stop_txn);
+
+ if (source->prepare != 0)
+ dest->prepare = 1;
+}
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 483fd429496..a06b1405651 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -11,6 +11,9 @@
#define WT_TXN_MAX (UINT64_MAX - 10) /* End of time */
#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back */
+#define WT_TS_NONE 0 /* Beginning of time */
+#define WT_TS_MAX UINT64_MAX /* End of time */
+
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_TXN_LOG_CKPT_CLEANUP 0x01u
#define WT_TXN_LOG_CKPT_PREPARE 0x02u
@@ -52,25 +55,6 @@ typedef enum {
#define WT_SESSION_IS_CHECKPOINT(s) ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
-#define WT_TS_NONE 0 /* Beginning of time */
-#define WT_TS_MAX UINT64_MAX /* End of time */
-
-/*
- * We format timestamps in a couple of ways, declare appropriate sized buffers. Hexadecimal is 2x
- * the size of the value. MongoDB format (high/low pairs of 4B unsigned integers, with surrounding
- * parenthesis and separating comma and space), is 2x the maximum digits from a 4B unsigned integer
- * plus 4. Both sizes include a trailing null byte as well.
- */
-#define WT_TS_HEX_STRING_SIZE (2 * sizeof(wt_timestamp_t) + 1)
-#define WT_TS_INT_STRING_SIZE (2 * 10 + 4 + 1)
-
-/*
- * We need an appropriately sized buffer for formatted time pairs. This is for time pairs of the
- * form (time_stamp, slash and transaction_id), which gives the max digits of a timestamp plus slash
- * plus max digits of a 8 byte integer with a trailing null byte.
- */
-#define WT_TP_STRING_SIZE (WT_TS_INT_STRING_SIZE + 1 + 20 + 1)
-
/*
* Perform an operation at the specified isolation level.
*
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 42d9233676c..0cd8c89c9a7 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -59,8 +59,8 @@ __wt_txn_err_set(WT_SESSION_IMPL *session, int ret)
* a prepared transaction.
*/
if (F_ISSET(txn, WT_TXN_PREPARE))
- WT_PANIC_MSG(session, ret,
- "transactional error logged after transaction was prepared, failing the system");
+ WT_IGNORE_RET(__wt_panic(session, ret,
+ "transactional error logged after transaction was prepared, failing the system"));
}
/*
@@ -584,6 +584,20 @@ __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
}
/*
+ * __wt_txn_upd_value_visible_all --
+ * Is the given update value visible to all (possible) readers?
+ */
+static inline bool
+__wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_value)
+{
+ if (upd_value->prepare_state == WT_PREPARE_LOCKED ||
+ upd_value->prepare_state == WT_PREPARE_INPROGRESS)
+ return (false);
+
+ return (__wt_txn_visible_all(session, upd_value->txnid, upd_value->start_ts));
+}
+
+/*
* __txn_visible_id --
* Can the current transaction see the given ID?
*/
@@ -769,12 +783,12 @@ __wt_upd_alloc_tombstone(WT_SESSION_IMPL *session, WT_UPDATE **updp, size_t *siz
* Get the first visible update in a list (or NULL if none are visible).
*/
static inline int
-__wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
+__wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
WT_VISIBLE_TYPE upd_visible;
uint8_t type;
- *updp = NULL;
+ __wt_upd_value_clear(cbt->upd_value);
for (; upd != NULL; upd = upd->next) {
WT_ORDERED_READ(type, upd->type);
@@ -784,19 +798,33 @@ __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **upd
upd_visible = __wt_txn_upd_visible_type(session, upd);
if (upd_visible == WT_VISIBLE_TRUE) {
/*
- * A tombstone representing a stop time pair will have either a valid txn id or a valid
- * timestamp. Ignore such tombstones in history store based on session settings.
+ * Ignore non-globally visible tombstones when we are doing history store scans in
+ * rollback to stable or when we are told to.
*/
- if (type == WT_UPDATE_TOMBSTONE && WT_IS_HS(S2BT(session)) &&
- F_ISSET(session, WT_SESSION_IGNORE_HS_TOMBSTONE) &&
- (upd->start_ts != WT_TS_NONE || upd->txnid != WT_TXN_NONE))
+ if (type == WT_UPDATE_TOMBSTONE &&
+ (F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) ||
+ (WT_IS_HS(S2BT(session)) && F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE))) &&
+ !__wt_txn_upd_visible_all(session, upd))
continue;
- *updp = upd;
- return (0);
+ break;
}
if (upd_visible == WT_VISIBLE_PREPARE)
return (WT_PREPARE_CONFLICT);
}
+ if (upd == NULL)
+ return (0);
+ /*
+ * Now assign to the update value. If it's not a modify, we're free to simply point the value at
+ * the update's memory without owning it. If it is a modify, we need to reconstruct the full
+ * update now and make the value own the buffer.
+ *
+ * If the caller has specifically asked us to skip assigning the buffer, we shouldn't bother
+ * reconstructing the modify.
+ */
+ if (upd->type != WT_UPDATE_MODIFY || cbt->upd_value->skip_buf)
+ __wt_upd_value_assign(cbt->upd_value, upd);
+ else
+ WT_RET(__wt_modify_reconstruct_from_upd_list(session, cbt, upd, cbt->upd_value));
return (0);
}
@@ -809,101 +837,83 @@ __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **upd
*/
static inline int
__wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno,
- WT_UPDATE *upd, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
+ WT_UPDATE *upd, WT_CELL_UNPACK *vpack)
{
- WT_DECL_RET;
- WT_ITEM buf;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
- *updp = NULL;
- WT_RET(__wt_txn_read_upd_list(session, upd, updp));
- if (*updp != NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, upd));
+ if (WT_UPDATE_DATA_VALUE(cbt->upd_value) ||
+ (cbt->upd_value->type == WT_UPDATE_MODIFY && cbt->upd_value->skip_buf))
return (0);
+ WT_ASSERT(session, cbt->upd_value->type == WT_UPDATE_INVALID);
/* If there is no ondisk value, there can't be anything in the history store either. */
- if (cbt->ref->page->dsk == NULL || cbt->slot == UINT32_MAX)
- return (__wt_upd_alloc_tombstone(session, updp, NULL));
-
- buf.data = NULL;
- buf.size = 0;
- buf.mem = NULL;
- buf.memsize = 0;
- buf.flags = 0;
+ if (cbt->ref->page->dsk == NULL || cbt->slot == UINT32_MAX) {
+ cbt->upd_value->type = WT_UPDATE_TOMBSTONE;
+ return (0);
+ }
/* Check the ondisk value. */
if (vpack == NULL) {
- ret = __wt_value_return_buf(cbt, cbt->ref, &buf, &start, &stop);
- if (ret != 0) {
- __wt_buf_free(session, &buf);
- return (ret);
- }
+ __wt_time_window_init(&tw);
+ WT_RET(__wt_value_return_buf(cbt, cbt->ref, &cbt->upd_value->buf, &tw));
} else {
- start.timestamp = vpack->start_ts;
- start.txnid = vpack->start_txn;
- stop.timestamp = vpack->stop_ts;
- stop.txnid = vpack->start_txn;
- buf.data = vpack->data;
- buf.size = vpack->size;
+ __wt_time_window_copy(&tw, &vpack->tw);
+ cbt->upd_value->buf.data = vpack->data;
+ cbt->upd_value->buf.size = vpack->size;
}
/*
- * If the stop pair is set, that means that there is a tombstone at that time. If the stop time
- * pair is visible to our txn then that means we've just spotted a tombstone and should return
- * "not found", except for history store scan during rollback to stable.
+ * If the stop pair is set, that means that there is a tombstone at that time. If it is not
+ * prepared and the stop time pair is visible to our txn then that means we've just spotted a
+ * tombstone and should return "not found", except for history store scan during rollback to
+ * stable and when we are told to ignore non-globally visible tombstones.
*/
- if (stop.txnid != WT_TXN_MAX && stop.timestamp != WT_TS_MAX &&
- (!WT_IS_HS(S2BT(session)) || !F_ISSET(session, WT_SESSION_IGNORE_HS_TOMBSTONE)) &&
- __wt_txn_visible(session, stop.txnid, stop.timestamp)) {
- __wt_buf_free(session, &buf);
- WT_RET(__wt_upd_alloc_tombstone(session, updp, NULL));
- (*updp)->txnid = stop.txnid;
- /* FIXME: Reevaluate this as part of PM-1524. */
- (*updp)->durable_ts = (*updp)->start_ts = stop.timestamp;
- F_SET(*updp, WT_UPDATE_RESTORED_FROM_DISK);
+ if (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX && !tw.prepare &&
+ __wt_txn_visible(session, tw.stop_txn, tw.stop_ts) &&
+ ((!F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) &&
+ (!WT_IS_HS(S2BT(session)) || !F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE))) ||
+ __wt_txn_visible_all(session, tw.stop_txn, tw.stop_ts))) {
+ cbt->upd_value->buf.data = NULL;
+ cbt->upd_value->buf.size = 0;
+ cbt->upd_value->start_ts = tw.stop_ts;
+ cbt->upd_value->txnid = tw.stop_txn;
+ cbt->upd_value->type = WT_UPDATE_TOMBSTONE;
+ cbt->upd_value->prepare_state = WT_PREPARE_INIT;
return (0);
}
/*
- * If the start time pair is visible then we need to return the ondisk value.
- *
- * FIXME-PM-1521: This should be probably be re-factored to return a buffer of bytes rather than
- * an update. This allocation is expensive and doesn't serve a purpose other than to work within
- * the current system.
+ * If the start time pair is visible and it is not a prepared value then we need to return the
+ * ondisk value.
*/
- if (__wt_txn_visible(session, start.txnid, start.timestamp) ||
- F_ISSET(session, WT_SESSION_RESOLVING_MODIFY)) {
+ if ((!tw.prepare || (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX)) &&
+ (__wt_txn_visible(session, tw.start_txn, tw.start_ts) ||
+ F_ISSET(session, WT_SESSION_RESOLVING_MODIFY))) {
/* If we are resolving a modify then the btree must be the history store. */
WT_ASSERT(
session, (F_ISSET(session, WT_SESSION_RESOLVING_MODIFY) && WT_IS_HS(S2BT(session))) ||
!F_ISSET(session, WT_SESSION_RESOLVING_MODIFY));
- ret = __wt_upd_alloc(session, &buf, WT_UPDATE_STANDARD, updp, NULL);
- __wt_buf_free(session, &buf);
- WT_RET(ret);
- (*updp)->txnid = start.txnid;
- (*updp)->start_ts = start.timestamp;
- F_SET((*updp), WT_UPDATE_RESTORED_FROM_DISK);
+ if (cbt->upd_value->skip_buf) {
+ cbt->upd_value->buf.data = NULL;
+ cbt->upd_value->buf.size = 0;
+ }
+ cbt->upd_value->start_ts = tw.start_ts;
+ cbt->upd_value->txnid = tw.start_txn;
+ cbt->upd_value->type = WT_UPDATE_STANDARD;
+ cbt->upd_value->prepare_state = WT_PREPARE_INIT;
return (0);
}
/* If there's no visible update in the update chain or ondisk, check the history store file. */
- if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS)) {
- ret = __wt_find_hs_upd(session, key, recno, updp, false, &buf);
- __wt_buf_free(session, &buf);
- WT_RET_NOTFOUND_OK(ret);
- }
-
- __wt_buf_free(session, &buf);
- /*
- * Return null not tombstone if nothing is found in history store.
- */
- WT_ASSERT(session, (*updp) == NULL || (*updp)->type != WT_UPDATE_TOMBSTONE);
+ if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !F_ISSET(S2BT(session), WT_BTREE_HS))
+ WT_RET_NOTFOUND_OK(__wt_find_hs_upd(session, key, cbt->iface.value_format, recno,
+ cbt->upd_value, false, &cbt->upd_value->buf));
- /*
- * FIXME-PM-1521: We call transaction read in a lot of places so we can't do this yet. When we
- * re-factor this function to return a byte array, we should tackle this at the same time.
- */
+ /* Return invalid not tombstone if nothing is found in history store. */
+ WT_ASSERT(session, cbt->upd_value->type != WT_UPDATE_TOMBSTONE);
return (0);
}
@@ -1107,7 +1117,7 @@ static inline int
__wt_txn_update_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
WT_DECL_RET;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
bool ignore_prepare_set, rollback;
@@ -1144,11 +1154,11 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE
*/
if (!rollback && upd == NULL && cbt != NULL && cbt->btree->type != BTREE_COL_FIX &&
cbt->ins == NULL) {
- __wt_read_cell_time_pairs(cbt, cbt->ref, &start, &stop);
- if (stop.txnid != WT_TXN_MAX && stop.timestamp != WT_TS_MAX)
- rollback = !__wt_txn_visible(session, stop.txnid, stop.timestamp);
+ __wt_read_cell_time_window(cbt, cbt->ref, &tw);
+ if (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX)
+ rollback = !__wt_txn_visible(session, tw.stop_txn, tw.stop_ts);
else
- rollback = !__wt_txn_visible(session, start.txnid, start.timestamp);
+ rollback = !__wt_txn_visible(session, tw.start_txn, tw.start_ts);
}
if (rollback) {
@@ -1253,3 +1263,40 @@ __wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active)
return (0);
}
+
+/*
+ * __wt_upd_value_assign --
+ * Point an update value at a given update. We're specifically not getting the value to own the
+ * memory since this exists in an update list somewhere.
+ */
+static inline void
+__wt_upd_value_assign(WT_UPDATE_VALUE *upd_value, WT_UPDATE *upd)
+{
+ if (!upd_value->skip_buf) {
+ upd_value->buf.data = upd->data;
+ upd_value->buf.size = upd->size;
+ }
+ upd_value->start_ts = upd->start_ts;
+ upd_value->txnid = upd->txnid;
+ upd_value->type = upd->type;
+ upd_value->prepare_state = upd->prepare_state;
+}
+
+/*
+ * __wt_upd_value_clear --
+ * Clear an update value to its defaults.
+ */
+static inline void
+__wt_upd_value_clear(WT_UPDATE_VALUE *upd_value)
+{
+ /*
+ * Make sure we don't touch the memory pointers here. If we have some allocated memory, that
+ * could come in handy next time we need to write to the buffer.
+ */
+ upd_value->buf.data = NULL;
+ upd_value->buf.size = 0;
+ upd_value->start_ts = WT_TS_NONE;
+ upd_value->txnid = WT_TXN_NONE;
+ upd_value->type = WT_UPDATE_INVALID;
+ upd_value->prepare_state = WT_PREPARE_INIT;
+}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 1a9bc7b7519..74fbe62d15b 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -712,17 +712,18 @@ struct __wt_cursor {
#define WT_CURSTD_DUMP_HEX 0x000080u
#define WT_CURSTD_DUMP_JSON 0x000100u
#define WT_CURSTD_DUMP_PRINT 0x000200u
-#define WT_CURSTD_JOINED 0x000400u
-#define WT_CURSTD_KEY_EXT 0x000800u /* Key points out of tree. */
-#define WT_CURSTD_KEY_INT 0x001000u /* Key points into tree. */
-#define WT_CURSTD_META_INUSE 0x002000u
-#define WT_CURSTD_OPEN 0x004000u
-#define WT_CURSTD_OVERWRITE 0x008000u
-#define WT_CURSTD_RAW 0x010000u
-#define WT_CURSTD_RAW_SEARCH 0x020000u
-#define WT_CURSTD_UPDATE_LOCAL 0x040000u
-#define WT_CURSTD_VALUE_EXT 0x080000u /* Value points out of tree. */
-#define WT_CURSTD_VALUE_INT 0x100000u /* Value points into tree. */
+#define WT_CURSTD_IGNORE_TOMBSTONE 0x000400u
+#define WT_CURSTD_JOINED 0x000800u
+#define WT_CURSTD_KEY_EXT 0x001000u /* Key points out of tree. */
+#define WT_CURSTD_KEY_INT 0x002000u /* Key points into tree. */
+#define WT_CURSTD_META_INUSE 0x004000u
+#define WT_CURSTD_OPEN 0x008000u
+#define WT_CURSTD_OVERWRITE 0x010000u
+#define WT_CURSTD_RAW 0x020000u
+#define WT_CURSTD_RAW_SEARCH 0x040000u
+#define WT_CURSTD_UPDATE_LOCAL 0x080000u
+#define WT_CURSTD_VALUE_EXT 0x100000u /* Value points out of tree. */
+#define WT_CURSTD_VALUE_INT 0x200000u /* Value points into tree. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
#define WT_CURSTD_KEY_SET (WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)
#define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
@@ -1950,8 +1951,9 @@ struct __wt_session {
* one of the following keys: \c "from=all" to drop all checkpoints\, \c "from=<checkpoint>"
* to drop all checkpoints after and including the named checkpoint\, or \c
* "to=<checkpoint>" to drop all checkpoints before and including the named checkpoint.
- * Checkpoints cannot be dropped while a hot backup is in progress or if open in a cursor.,
- * a list of strings; default empty.}
+ * Checkpoints cannot be dropped if open in a cursor. While a hot backup is in progress\,
+ * checkpoints created prior to the start of the backup cannot be dropped., a list of
+ * strings; default empty.}
* @config{force, if false (the default)\, checkpoints may be skipped if the underlying
* object has not been modified\, if true\, this option forces the checkpoint., a boolean
* flag; default \c false.}
@@ -6243,10 +6245,12 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_REC_PAGES_EVICTION 2142
/*! reconciliation: pages deleted */
#define WT_STAT_DSRC_REC_PAGE_DELETE 2143
+/*! reconciliation: prepared values written */
+#define WT_STAT_DSRC_REC_PREPARE_VALUE 2144
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2144
+#define WT_STAT_DSRC_SESSION_COMPACT 2145
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2145
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2146
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 204e6fd0eb9..bdf26c80663 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -315,8 +315,10 @@ struct __wt_thread;
typedef struct __wt_thread WT_THREAD;
struct __wt_thread_group;
typedef struct __wt_thread_group WT_THREAD_GROUP;
-struct __wt_time_pair;
-typedef struct __wt_time_pair WT_TIME_PAIR;
+struct __wt_time_aggregate;
+typedef struct __wt_time_aggregate WT_TIME_AGGREGATE;
+struct __wt_time_window;
+typedef struct __wt_time_window WT_TIME_WINDOW;
struct __wt_txn;
typedef struct __wt_txn WT_TXN;
struct __wt_txn_global;
@@ -329,6 +331,8 @@ struct __wt_txn_shared;
typedef struct __wt_txn_shared WT_TXN_SHARED;
struct __wt_update;
typedef struct __wt_update WT_UPDATE;
+struct __wt_update_value;
+typedef struct __wt_update_value WT_UPDATE_VALUE;
union __wt_lsn;
typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
@@ -375,8 +379,9 @@ typedef uint64_t wt_timestamp_t;
#include "misc.h"
#include "mutex.h"
-#include "stat.h" /* required by dhandle.h */
-#include "dhandle.h" /* required by btree.h */
+#include "stat.h" /* required by dhandle.h */
+#include "dhandle.h" /* required by btree.h */
+#include "timestamp.h" /* required by reconcile.h */
#include "api.h"
#include "async.h"
@@ -418,10 +423,11 @@ typedef uint64_t wt_timestamp_t;
#include "intpack.i" /* required by cell.i, packing.i */
#include "misc.i" /* required by mutex.i */
-#include "buf.i" /* required by cell.i */
-#include "cell.i" /* required by btree.i */
-#include "mutex.i" /* required by btree.i */
-#include "txn.i" /* required by btree.i */
+#include "buf.i" /* required by cell.i */
+#include "timestamp.i" /* required by btree.i */
+#include "cell.i" /* required by btree.i */
+#include "mutex.i" /* required by btree.i */
+#include "txn.i" /* required by btree.i */
#include "bitstring.i"
#include "block.i"
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 87e4bda2a8a..e8348fb9933 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -215,7 +215,7 @@ __log_fs_write(
}
__wt_capacity_throttle(session, len, WT_THROTTLE_LOG);
if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0)
- WT_PANIC_RET(session, ret, "%s: fatal log failure", slot->slot_fh->name);
+ WT_RET_PANIC(session, ret, "%s: fatal log failure", slot->slot_fh->name);
return (ret);
}
@@ -1166,7 +1166,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* can copy the files in any way they choose, and a log file rename might confuse things.
*/
create_log = true;
- if (conn->log_prealloc > 0 && !conn->hot_backup) {
+ if (conn->log_prealloc > 0 && conn->hot_backup_start == 0) {
WT_WITH_HOTBACKUP_READ_LOCK(
session, ret = __log_alloc_prealloc(session, log->fileid), &skipp);
@@ -1194,7 +1194,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* Increment the missed pre-allocated file counter only if a hot backup is not in progress.
* We are deliberately not using pre-allocated log files during backup (see comment above).
*/
- if (!conn->hot_backup)
+ if (conn->hot_backup_start == 0)
log->prep_missed++;
WT_RET(__wt_log_allocfile(session, log->fileid, WT_LOG_FILENAME));
}
@@ -1383,7 +1383,7 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset)
conn = S2C(session);
log = conn->log;
- if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) {
+ if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && conn->hot_backup_start == 0) {
WT_WITH_HOTBACKUP_READ_LOCK(session, ret = __wt_ftruncate(session, log_fh, offset), &skipp);
if (!skipp) {
if (ret != ENOTSUP)
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 6052d20025f..7017cf74fd5 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -32,7 +32,7 @@ __wt_clsm_request_switch(WT_CURSOR_LSM *clsm)
WT_SESSION_IMPL *session;
lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
if (!lsm_tree->need_switch) {
/*
@@ -64,7 +64,7 @@ __wt_clsm_await_switch(WT_CURSOR_LSM *clsm)
int waited;
lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
/*
* If there is no primary chunk, or a chunk has overflowed the hard limit, which either means a
@@ -96,7 +96,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm)
bool hard_limit, have_primary, ovfl;
lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
if (clsm->nchunks == 0) {
primary = NULL;
@@ -159,7 +159,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
uint64_t i, pinned_id, switch_txn;
lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
txn = session->txn;
/* Merge cursors never update. */
@@ -259,7 +259,7 @@ __clsm_leave(WT_CURSOR_LSM *clsm)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
if (F_ISSET(clsm, WT_CLSM_ACTIVE)) {
--session->ncursors;
@@ -428,7 +428,7 @@ __clsm_open_cursors(WT_CURSOR_LSM *clsm, bool update, u_int start_chunk, uint32_
c = &clsm->iface;
cursor = NULL;
- session = (WT_SESSION_IMPL *)c->session;
+ session = CUR2S(clsm);
txn = session->txn;
chunk = NULL;
locked = false;
@@ -712,7 +712,7 @@ __wt_clsm_init_merge(WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_
WT_SESSION_IMPL *session;
clsm = (WT_CURSOR_LSM *)cursor;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
F_SET(clsm, WT_CLSM_MERGE);
if (start_chunk != 0)
@@ -816,7 +816,7 @@ __clsm_position_chunk(WT_CURSOR_LSM *clsm, WT_CURSOR *c, bool forward, int *cmpp
WT_SESSION_IMPL *session;
cursor = &clsm->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
c->set_key(c, &cursor->key);
WT_RET(c->search_near(c, cmpp));
@@ -1149,7 +1149,7 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value)
c = NULL;
cursor = &clsm->iface;
have_hash = false;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
WT_FORALL_CURSORS(clsm, c, i)
{
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
index 5861bf4c899..50dbcf5726c 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
@@ -25,7 +25,7 @@ __clsm_close_bulk(WT_CURSOR *cursor)
clsm = (WT_CURSOR_LSM *)cursor;
lsm_tree = clsm->lsm_tree;
chunk = lsm_tree->chunk[0];
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
/* Close the bulk cursor to ensure the chunk is written to disk. */
bulk_cursor = clsm->chunks[0]->cursor;
@@ -67,7 +67,7 @@ __clsm_insert_bulk(WT_CURSOR *cursor)
clsm = (WT_CURSOR_LSM *)cursor;
lsm_tree = clsm->lsm_tree;
chunk = lsm_tree->chunk[0];
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
++chunk->count;
@@ -95,7 +95,7 @@ __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[])
bulk_cursor = NULL;
cursor = &clsm->iface;
lsm_tree = clsm->lsm_tree;
- session = (WT_SESSION_IMPL *)clsm->iface.session;
+ session = CUR2S(clsm);
F_SET(clsm, WT_CLSM_BULK);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 2d5c017909f..13246d2c6a4 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -442,7 +442,7 @@ __lsm_worker_manager(void *arg)
if (ret != 0) {
err:
- WT_PANIC_MSG(session, ret, "LSM worker manager thread error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "LSM worker manager thread error"));
}
/* Connection close waits on us to shutdown, let it know we're done. */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_merge.c b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
index 75d97ab898b..ac586eb9e05 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_merge.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_merge.c
@@ -544,7 +544,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
* of the tree.
*/
if ((ret = __wt_lsm_meta_write(session, lsm_tree, NULL)) != 0)
- WT_PANIC_ERR(session, ret, "Failed finalizing LSM merge");
+ WT_ERR_PANIC(session, ret, "Failed finalizing LSM merge");
lsm_tree->dsk_gen++;
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 64202688de3..6e71c0434c5 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -781,7 +781,7 @@ err:
* progress. Error out of WiredTiger.
*/
if (ret != 0)
- WT_PANIC_RET(session, ret, "Failed doing LSM switch");
+ WT_RET_PANIC(session, ret, "Failed doing LSM switch");
else if (!first_switch)
WT_RET(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
return (ret);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
index bee2f58c5a6..de06c1cc5ff 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
@@ -164,7 +164,7 @@ __lsm_worker(void *arg)
if (ret != 0) {
err:
__wt_lsm_manager_free_work_unit(session, entry);
- WT_PANIC_MSG(session, ret, "Error in LSM worker thread %u", cookie->id);
+ WT_IGNORE_RET(__wt_panic(session, ret, "Error in LSM worker thread %u", cookie->id));
}
return (WT_THREAD_RET_VALUE);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 66415b2cd62..21acec991c6 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -463,7 +463,6 @@ __wt_meta_ckptlist_get(
WT_CONFIG_ITEM k, v;
WT_DECL_RET;
size_t allocated, slot;
- int64_t maxorder;
char *config;
*ckptbasep = NULL;
@@ -508,11 +507,8 @@ __wt_meta_ckptlist_get(
WT_ERR(__wt_realloc_def(session, &allocated, slot + 2, &ckptbase));
/* The caller may be adding a value, initialize it. */
- maxorder = 0;
- WT_CKPT_FOREACH (ckptbase, ckpt)
- if (ckpt->order > maxorder)
- maxorder = ckpt->order;
- ckpt->order = maxorder + 1;
+ ckpt = &ckptbase[slot];
+ ckpt->order = (slot == 0) ? 1 : ckptbase[slot - 1].order + 1;
__wt_seconds(session, &ckpt->sec);
/*
* Load most recent checkpoint backup blocks to this checkpoint.
@@ -586,26 +582,44 @@ __ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_C
ckpt->size = (uint64_t)a.val;
/* Default to durability. */
- ret = __wt_config_subgets(session, v, "start_durable_ts", &a);
- WT_RET_NOTFOUND_OK(ret);
- ckpt->start_durable_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
+ __wt_time_aggregate_init(&ckpt->ta);
+
ret = __wt_config_subgets(session, v, "oldest_start_ts", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->oldest_start_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.oldest_start_ts = (uint64_t)a.val;
+
ret = __wt_config_subgets(session, v, "oldest_start_txn", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->oldest_start_txn = ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_NONE : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "stop_durable_ts", &a);
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.oldest_start_txn = (uint64_t)a.val;
+
+ ret = __wt_config_subgets(session, v, "newest_start_durable_ts", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->stop_durable_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.newest_start_durable_ts = (uint64_t)a.val;
+
ret = __wt_config_subgets(session, v, "newest_stop_ts", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_stop_ts = ret == WT_NOTFOUND || a.len == 0 ? WT_TS_MAX : (uint64_t)a.val;
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.newest_stop_ts = (uint64_t)a.val;
+
ret = __wt_config_subgets(session, v, "newest_stop_txn", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_stop_txn = ret == WT_NOTFOUND || a.len == 0 ? WT_TXN_MAX : (uint64_t)a.val;
- __wt_check_addr_validity(session, ckpt->start_durable_ts, ckpt->oldest_start_ts,
- ckpt->oldest_start_txn, ckpt->stop_durable_ts, ckpt->newest_stop_ts, ckpt->newest_stop_txn);
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.newest_stop_txn = (uint64_t)a.val;
+
+ ret = __wt_config_subgets(session, v, "newest_stop_durable_ts", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.newest_stop_durable_ts = (uint64_t)a.val;
+
+ ret = __wt_config_subgets(session, v, "prepare", &a);
+ WT_RET_NOTFOUND_OK(ret);
+ if (ret != WT_NOTFOUND && a.len != 0)
+ ckpt->ta.prepare = (uint8_t)a.val;
+
+ __wt_check_addr_validity(session, &ckpt->ta);
WT_RET(__wt_config_subgets(session, v, "write_gen", &a));
if (a.len == 0)
@@ -691,9 +705,7 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM
WT_RET(__wt_raw_to_hex(session, ckpt->raw.data, ckpt->raw.size, &ckpt->addr));
}
- __wt_check_addr_validity(session, ckpt->start_durable_ts, ckpt->oldest_start_ts,
- ckpt->oldest_start_txn, ckpt->stop_durable_ts, ckpt->newest_stop_ts,
- ckpt->newest_stop_txn);
+ __wt_check_addr_validity(session, &ckpt->ta);
WT_RET(__wt_buf_catfmt(session, buf, "%s%s", sep, ckpt->name));
sep = ",";
@@ -701,18 +713,17 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM
if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
WT_RET(__wt_buf_catfmt(session, buf, ".%" PRId64, ckpt->order));
- /*
- * Use PRId64 formats: WiredTiger's configuration code handles signed 8B values.
- */
+ /* Use PRId64 formats: WiredTiger's configuration code handles signed 8B values. */
WT_RET(__wt_buf_catfmt(session, buf,
"=(addr=\"%.*s\",order=%" PRId64 ",time=%" PRIu64 ",size=%" PRId64
- ",start_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64 ",oldest_start_txn=%" PRId64
- ",stop_durable_ts=%" PRId64 ",newest_stop_ts=%" PRId64 ",newest_stop_txn=%" PRId64
- ",write_gen=%" PRId64 ")",
+ ",newest_start_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64
+ ",oldest_start_txn=%" PRId64 ",newest_stop_durable_ts=%" PRId64 ",newest_stop_ts=%" PRId64
+ ",newest_stop_txn=%" PRId64 ",prepare:%d,write_gen=%" PRId64 ")",
(int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec,
- (int64_t)ckpt->size, (int64_t)ckpt->start_durable_ts, (int64_t)ckpt->oldest_start_ts,
- (int64_t)ckpt->oldest_start_txn, (int64_t)ckpt->stop_durable_ts,
- (int64_t)ckpt->newest_stop_ts, (int64_t)ckpt->newest_stop_txn, (int64_t)ckpt->write_gen));
+ (int64_t)ckpt->size, (int64_t)ckpt->ta.newest_start_durable_ts,
+ (int64_t)ckpt->ta.oldest_start_ts, (int64_t)ckpt->ta.oldest_start_txn,
+ (int64_t)ckpt->ta.newest_stop_durable_ts, (int64_t)ckpt->ta.newest_stop_ts,
+ (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen));
}
WT_RET(__wt_buf_catfmt(session, buf, ")"));
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 5b2710e8aba..a569e132d75 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -331,7 +331,7 @@ err:
__wt_cond_signal(session, S2C(session)->sweep_cond);
if (ret != 0)
- WT_PANIC_RET(session, ret, "failed to apply or unroll all tracked operations");
+ WT_RET_PANIC(session, ret, "failed to apply or unroll all tracked operations");
return (saved_ret == 0 ? 0 : saved_ret);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 9bedba65310..119438fefcd 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -80,7 +80,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session)
break;
WT_ERR(__wt_getline(session, fs, value));
if (value->size == 0)
- WT_PANIC_ERR(session, EINVAL, "%s: zero-length value", WT_METADATA_BACKUP);
+ WT_ERR_PANIC(session, EINVAL, "%s: zero-length value", WT_METADATA_BACKUP);
WT_ERR(__wt_metadata_update(session, key->data, value->data));
}
@@ -381,7 +381,7 @@ err:
*/
if (ret == 0 || strcmp(key, WT_METADATA_COMPAT) == 0 || F_ISSET(S2C(session), WT_CONN_SALVAGE))
return (ret);
- WT_PANIC_RET(session, WT_TRY_SALVAGE, "%s: fatal turtle file read error", WT_METADATA_TURTLE);
+ WT_RET_PANIC(session, WT_TRY_SALVAGE, "%s: fatal turtle file read error", WT_METADATA_TURTLE);
}
/*
@@ -437,5 +437,5 @@ err:
*/
if (ret == 0)
return (ret);
- WT_PANIC_RET(session, ret, "%s: fatal turtle file update error", WT_METADATA_TURTLE);
+ WT_RET_PANIC(session, ret, "%s: fatal turtle file update error", WT_METADATA_TURTLE);
}
diff --git a/src/third_party/wiredtiger/src/optrack/optrack.c b/src/third_party/wiredtiger/src/optrack/optrack.c
index 6b77c534e4f..916c6ab74b6 100644
--- a/src/third_party/wiredtiger/src/optrack/optrack.c
+++ b/src/third_party/wiredtiger/src/optrack/optrack.c
@@ -39,7 +39,7 @@ __wt_optrack_record_funcid(WT_SESSION_IMPL *session, const char *func, uint16_t
if (0) {
err:
- WT_PANIC_MSG(session, ret, "operation tracking initialization failure");
+ WT_IGNORE_RET(__wt_panic(session, ret, "operation tracking initialization failure"));
}
if (locked)
diff --git a/src/third_party/wiredtiger/src/os_common/os_errno.c b/src/third_party/wiredtiger/src/os_common/os_errno.c
index 5c77449729a..c7c8a6dfd4d 100644
--- a/src/third_party/wiredtiger/src/os_common/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_common/os_errno.c
@@ -78,6 +78,7 @@ __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uin
return (__wt_map_windows_error(windows_error));
#else
WT_UNUSED(windows_error);
- return (WT_PANIC);
+ WT_RET_PANIC(
+ (WT_SESSION_IMPL *)wt_session, WT_PANIC, "unexpected attempt to map Windows error");
#endif
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index d8f695fa1c8..c0fc74ee8f8 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -88,7 +88,7 @@ __posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, const char *fun
WT_SYSCALL(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
if (ret == 0)
return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
+ WT_RET_PANIC(session, ret, "%s: %s: fcntl(F_FULLFSYNC)", name, func);
}
#endif
#if defined(HAVE_FDATASYNC)
@@ -96,13 +96,13 @@ __posix_sync(WT_SESSION_IMPL *session, int fd, const char *name, const char *fun
WT_SYSCALL(fdatasync(fd), ret);
if (ret == 0)
return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fdatasync", name, func);
+ WT_RET_PANIC(session, ret, "%s: %s: fdatasync", name, func);
#else
/* See comment in __posix_sync(): sync cannot be retried or fail. */
WT_SYSCALL(fsync(fd), ret);
if (ret == 0)
return (0);
- WT_PANIC_RET(session, ret, "%s: %s: fsync", name, func);
+ WT_RET_PANIC(session, ret, "%s: %s: fsync", name, func);
#endif
}
@@ -148,7 +148,7 @@ err:
return (ret);
/* See comment in __posix_sync(): sync cannot be retried or fail. */
- WT_PANIC_RET(session, ret, "%s: directory-sync", path);
+ WT_RET_PANIC(session, ret, "%s: directory-sync", path);
}
#endif
@@ -541,7 +541,7 @@ __posix_file_sync_nowait(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session)
if (ret == 0)
return (0);
- WT_PANIC_RET(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name);
+ WT_RET_PANIC(session, ret, "%s: handle-sync-nowait: sync_file_range", file_handle->name);
}
#endif
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index ac9a676b9b0..61896fa1b83 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -106,7 +106,7 @@ __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs
#ifdef HAVE_PTHREAD_COND_MONOTONIC
WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
if (ret != 0)
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_IGNORE_RET(__wt_panic(session, ret, "clock_gettime"));
#else
__wt_epoch_raw(session, &ts);
#endif
@@ -140,7 +140,7 @@ err:
if (ret == 0)
return;
- WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_cond_wait: %s", cond->name));
}
/*
@@ -175,7 +175,7 @@ __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
return;
err:
- WT_PANIC_MSG(session, ret, "pthread_cond_broadcast: %s", cond->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_cond_broadcast: %s", cond->name));
}
/*
@@ -193,10 +193,10 @@ __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
return;
if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_cond_destroy: %s", cond->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_cond_destroy: %s", cond->name));
if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
- WT_PANIC_MSG(session, ret, "pthread_mutex_destroy: %s", cond->name);
+ WT_IGNORE_RET(__wt_panic(session, ret, "pthread_mutex_destroy: %s", cond->name));
__wt_free(session, *condp);
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_time.c b/src/third_party/wiredtiger/src/os_posix/os_time.c
index 57e567e7828..38a3be97c92 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_time.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_time.c
@@ -30,7 +30,7 @@ __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
if (ret == 0)
return;
- WT_PANIC_MSG(session, ret, "clock_gettime");
+ WT_IGNORE_RET(__wt_panic(session, ret, "clock_gettime"));
#elif defined(HAVE_GETTIMEOFDAY)
{
struct timeval v;
@@ -41,7 +41,7 @@ __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
return;
}
- WT_PANIC_MSG(session, ret, "gettimeofday");
+ WT_IGNORE_RET(__wt_panic(session, ret, "gettimeofday"));
}
#else
NO TIME - OF - DAY IMPLEMENTATION : see src / os_posix / os_time.c
diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
index 32a5e42c193..687796efe33 100644
--- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
@@ -116,8 +116,8 @@ skipping:
__wt_err(session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s: %s",
cond->name, __wt_formatmessage(session, windows_error));
- WT_PANIC_MSG(
- session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s", cond->name);
+ WT_IGNORE_RET(__wt_panic(
+ session, __wt_map_windows_error(windows_error), "SleepConditionVariableCS: %s", cond->name));
}
/*
diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c
index 118635ab734..6e95f17a94e 100644
--- a/src/third_party/wiredtiger/src/os_win/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_win/os_thread.c
@@ -56,11 +56,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t *tid)
if ((windows_error = WaitForSingleObject(tid->id, INFINITE)) != WAIT_OBJECT_0) {
if (windows_error == WAIT_FAILED)
windows_error = __wt_getlasterror();
- __wt_err(session, __wt_map_windows_error(windows_error),
- "thread join: WaitForSingleObject: %s", __wt_formatmessage(session, windows_error));
/* If we fail to wait, we will leak handles, do not continue. */
- return (WT_PANIC);
+ return (__wt_panic(session, __wt_map_windows_error(windows_error),
+ "thread join: WaitForSingleObject: %s", __wt_formatmessage(session, windows_error)));
}
if (CloseHandle(tid->id) == 0) {
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_child.c b/src/third_party/wiredtiger/src/reconcile/rec_child.c
index 2d3f17a22af..d235f926fa0 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_child.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_child.c
@@ -36,7 +36,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_C
if (F_ISSET(r, WT_REC_CLEAN_AFTER_REC | WT_REC_VISIBILITY_ERR) && page_del != NULL &&
__wt_page_del_active(session, ref, false)) {
if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
- WT_PANIC_RET(session, EINVAL, "reconciliation illegally skipped an update");
+ WT_RET_PANIC(session, EINVAL, "reconciliation illegally skipped an update");
return (__wt_set_return(session, EBUSY));
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index ffa4c94f1b2..18a4a16b556 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -19,7 +19,7 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk)
WT_RECONCILE *r;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
+ session = CUR2S(cbulk);
r = cbulk->reconcile;
btree = S2BT(session);
@@ -110,14 +110,15 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
WT_BTREE *btree;
WT_RECONCILE *r;
WT_REC_KV *val;
+ WT_TIME_WINDOW tw;
r = cbulk->reconcile;
btree = S2BT(session);
+ __wt_time_window_init(&tw);
val = &r->v;
if (deleted) {
- val->cell_len = __wt_cell_pack_del(session, &val->cell, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
- WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, cbulk->rle);
+ val->cell_len = __wt_cell_pack_del(session, &val->cell, &tw, cbulk->rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
@@ -126,8 +127,8 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
* Store the bulk cursor's last buffer, not the current value, we're tracking duplicates,
* which means we want the previous value seen, not the current value.
*/
- WT_RET(__wt_rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, WT_TS_NONE,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, false, cbulk->rle));
+ WT_RET(
+ __wt_rec_cell_build_val(session, r, cbulk->last.data, cbulk->last.size, &tw, cbulk->rle));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, val->len))
@@ -135,11 +136,9 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
/* Copy the value onto the page. */
if (btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE,
- WT_TS_MAX, WT_TXN_MAX, false, cbulk->rle, val));
+ WT_RET(__wt_rec_dict_replace(session, r, &tw, cbulk->rle, val));
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(
- r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, false);
+ __wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
/* Update the starting record number in case we split. */
r->recno += cbulk->rle;
@@ -179,9 +178,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the value onto the page. */
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, addr->newest_start_durable_ts, addr->oldest_start_ts,
- addr->oldest_start_txn, addr->newest_stop_durable_ts, addr->newest_stop_ts,
- addr->newest_stop_txn, addr->prepare);
+ __wt_time_aggregate_merge(&r->cur_ptr->ta, &addr->ta);
}
return (0);
}
@@ -201,14 +198,14 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
WT_PAGE *child, *page;
WT_REC_KV *val;
WT_REF *ref;
- wt_timestamp_t newest_start_durable_ts, newest_stop_durable_ts, newest_stop_ts, oldest_start_ts;
- uint64_t newest_stop_txn, oldest_start_txn;
- bool hazard, prepare;
+ WT_TIME_AGGREGATE ta;
+ bool hazard;
btree = S2BT(session);
page = pageref->page;
child = NULL;
hazard = false;
+ __wt_time_aggregate_init(&ta);
val = &r->v;
vpack = &_vpack;
@@ -283,22 +280,10 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
val->buf.size = __wt_cell_total_len(vpack);
val->cell_len = 0;
val->len = val->buf.size;
- newest_start_durable_ts = vpack->newest_start_durable_ts;
- oldest_start_ts = vpack->oldest_start_ts;
- oldest_start_txn = vpack->oldest_start_txn;
- newest_stop_durable_ts = vpack->newest_stop_durable_ts;
- newest_stop_ts = vpack->newest_stop_ts;
- newest_stop_txn = vpack->newest_stop_txn;
- prepare = F_ISSET(vpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&ta, &vpack->ta);
} else {
__wt_rec_cell_build_addr(session, r, addr, NULL, false, ref->ref_recno);
- newest_start_durable_ts = addr->newest_start_durable_ts;
- oldest_start_ts = addr->oldest_start_ts;
- oldest_start_txn = addr->oldest_start_txn;
- newest_stop_durable_ts = addr->newest_stop_durable_ts;
- newest_stop_ts = addr->newest_stop_ts;
- newest_stop_txn = addr->newest_stop_txn;
- prepare = addr->prepare;
+ __wt_time_aggregate_copy(&ta, &addr->ta);
}
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -308,8 +293,7 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
/* Copy the value onto the page. */
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, newest_start_durable_ts, oldest_start_ts, oldest_start_txn,
- newest_stop_durable_ts, newest_stop_ts, newest_stop_txn, prepare);
+ __wt_time_aggregate_merge(&r->cur_ptr->ta, &ta);
}
WT_INTL_FOREACH_END;
@@ -329,7 +313,6 @@ int
__wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
WT_BTREE *btree;
- WT_DECL_RET;
WT_INSERT *ins;
WT_PAGE *page;
WT_UPDATE *upd;
@@ -350,13 +333,9 @@ __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
WT_SKIP_FOREACH (ins, WT_COL_UPDATE_SINGLE(page)) {
WT_RET(__wt_rec_upd_select(session, r, ins, NULL, NULL, &upd_select));
upd = upd_select.upd;
- if (upd != NULL) {
+ if (upd != NULL)
__bit_setv(
r->first_free, WT_INSERT_RECNO(ins) - pageref->ref_recno, btree->bitcnt, *upd->data);
- /* Free the update if it is external. */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
- }
}
/* Calculate the number of entries per page remainder. */
@@ -422,17 +401,13 @@ __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
* last, allowing it to grow in the future.
*/
__wt_rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
- WT_ERR(__wt_rec_split(session, r, 0, false));
+ WT_RET(__wt_rec_split(session, r, 0, false));
/* Calculate the number of entries per page. */
entry = 0;
nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
}
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
/*
* Execute this loop once without an insert item to catch any missing records due to a
* split, then quit.
@@ -445,14 +420,9 @@ __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
__wt_rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
/* Write the remnant page. */
- ret = __wt_rec_split_finish(session, r);
+ WT_RET(__wt_rec_split_finish(session, r));
-err:
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
- return (ret);
+ return (0);
}
/*
@@ -504,7 +474,7 @@ __wt_rec_col_fix_slvg(
* We can't split during salvage -- if everything didn't fit, it's all gone wrong.
*/
if (salvage->missing != 0 || page_take != 0)
- WT_PANIC_RET(session, WT_PANIC, "%s page too large, attempted split during salvage",
+ WT_RET_PANIC(session, WT_PANIC, "%s page too large, attempted split during salvage",
__wt_page_type_string(page->type));
/* Write the page. */
@@ -517,9 +487,7 @@ __wt_rec_col_fix_slvg(
*/
static int
__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKIE *salvage,
- WT_ITEM *value, wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle, bool deleted, bool overflow_type)
+ WT_ITEM *value, WT_TIME_WINDOW *tw, uint64_t rle, bool deleted, bool overflow_type)
{
WT_BTREE *btree;
WT_REC_KV *val;
@@ -558,21 +526,18 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKI
}
if (deleted) {
- val->cell_len = __wt_cell_pack_del(session, &val->cell, start_durable_ts, start_ts,
- start_txn, stop_durable_ts, stop_ts, stop_txn, rle);
+ val->cell_len = __wt_cell_pack_del(session, &val->cell, tw, rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
} else if (overflow_type) {
val->cell_len =
- __wt_cell_pack_ovfl(session, &val->cell, WT_CELL_VALUE_OVFL, start_durable_ts, start_ts,
- start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, rle, value->size);
+ __wt_cell_pack_ovfl(session, &val->cell, WT_CELL_VALUE_OVFL, tw, rle, value->size);
val->buf.data = value->data;
val->buf.size = value->size;
val->len = val->cell_len + value->size;
} else
- WT_RET(__wt_rec_cell_build_val(session, r, value->data, value->size, start_durable_ts,
- start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, rle));
+ WT_RET(__wt_rec_cell_build_val(session, r, value->data, value->size, tw, rle));
/* Boundary: split or write the page. */
if (__wt_rec_need_split(r, val->len))
@@ -580,11 +545,9 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKI
/* Copy the value onto the page. */
if (!deleted && !overflow_type && btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r, start_durable_ts, start_ts, start_txn,
- stop_durable_ts, stop_ts, stop_txn, prepare, rle, val));
+ WT_RET(__wt_rec_dict_replace(session, r, tw, rle, val));
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(
- r, start_durable_ts, start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare);
+ __wt_time_aggregate_update(&r->cur_ptr->ta, tw);
/* Update the starting record number in case we split. */
r->recno += rle;
@@ -602,15 +565,9 @@ __wt_rec_col_var(
{
enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
struct {
- WT_ITEM *value; /* Value */
- wt_timestamp_t start_durable_ts; /* Timestamps/TxnID */
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_durable_ts;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
+ WT_ITEM *value; /* Value */
+ WT_TIME_WINDOW tw;
bool deleted; /* If deleted */
- bool prepare;
} last;
WT_BTREE *btree;
WT_CELL *cell;
@@ -621,13 +578,12 @@ __wt_rec_col_var(
WT_DECL_RET;
WT_INSERT *ins;
WT_PAGE *page;
+ WT_TIME_WINDOW tw, default_tw;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_durable_ts, start_ts, stop_durable_ts, stop_ts;
uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
- uint64_t start_txn, stop_txn;
uint32_t i, size;
- bool deleted, orig_deleted, prepare, update_no_copy;
+ bool deleted, orig_deleted, update_no_copy;
const void *data;
btree = S2BT(session);
@@ -636,33 +592,22 @@ __wt_rec_col_var(
upd = NULL;
size = 0;
data = NULL;
+ __wt_time_window_init(&default_tw);
cbt = &r->update_modify_cbt;
cbt->iface.session = (WT_SESSION *)session;
/* Set the "last" values to cause failure if they're not set. */
last.value = r->last;
- last.start_durable_ts = WT_TS_MAX;
- last.start_ts = WT_TS_MAX;
- last.start_txn = WT_TXN_MAX;
- last.stop_durable_ts = WT_TS_MAX;
- last.stop_ts = WT_TS_NONE;
- last.stop_txn = WT_TXN_NONE;
+ __wt_time_window_init_max(&last.tw);
last.deleted = false;
- last.prepare = false;
/*
* Set the start/stop values to cause failure if they're not set.
* [-Werror=maybe-uninitialized]
*/
/* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_MAX;
- start_txn = WT_TXN_MAX;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_NONE;
- stop_txn = WT_TS_NONE;
- prepare = false;
+ __wt_time_window_init_max(&tw);
WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage_precomp));
@@ -681,14 +626,8 @@ __wt_rec_col_var(
if (salvage != NULL && salvage->missing != 0) {
if (salvage->skip == 0) {
rle = salvage->missing;
- last.start_durable_ts = WT_TS_NONE;
- last.start_ts = WT_TS_NONE;
- last.start_txn = WT_TXN_NONE;
- last.stop_durable_ts = WT_TS_NONE;
- last.stop_ts = WT_TS_MAX;
- last.stop_txn = WT_TXN_MAX;
+ __wt_time_window_init(&last.tw);
last.deleted = true;
- last.prepare = false;
/*
* Correct the number of records we're going to "take", pretending the missing records
@@ -696,8 +635,8 @@ __wt_rec_col_var(
*/
salvage->take += salvage->missing;
} else
- WT_ERR(__rec_col_var_helper(session, r, NULL, NULL, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
- WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, prepare, salvage->missing, true, false));
+ WT_ERR(__rec_col_var_helper(
+ session, r, NULL, NULL, &default_tw, salvage->missing, true, false));
}
/*
@@ -765,9 +704,8 @@ record_loop:
ins = WT_SKIP_NEXT(ins);
}
- update_no_copy =
- upd == NULL || !F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK); /* No data copy */
- repeat_count = 1; /* Single record */
+ update_no_copy = true; /* No data copy */
+ repeat_count = 1; /* Single record */
deleted = false;
if (upd == NULL) {
@@ -788,26 +726,12 @@ record_loop:
*/
deleted = orig_deleted;
if (deleted || salvage) {
- /* Set time pairs for the deleted key. */
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- prepare = false;
+ __wt_time_window_init(&tw);
if (deleted)
goto compare;
- } else {
- start_durable_ts = vpack->durable_start_ts;
- start_ts = vpack->start_ts;
- start_txn = vpack->start_txn;
- stop_durable_ts = vpack->durable_stop_ts;
- stop_ts = vpack->stop_ts;
- stop_txn = vpack->stop_txn;
- prepare = F_ISSET(vpack, WT_CELL_UNPACK_PREPARE);
- }
+ } else
+ __wt_time_window_copy(&tw, &vpack->tw);
/*
* If we are handling overflow items, use the overflow item itself exactly once,
@@ -822,18 +746,15 @@ record_loop:
* We're going to copy the on-page cell, write out any record we're tracking.
*/
if (rle != 0) {
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value,
- last.start_durable_ts, last.start_ts, last.start_txn,
- last.stop_durable_ts, last.stop_ts, last.stop_txn, last.prepare, rle,
- last.deleted, false));
+ WT_ERR(__rec_col_var_helper(
+ session, r, salvage, last.value, &last.tw, rle, last.deleted, false));
rle = 0;
}
last.value->data = vpack->data;
last.value->size = vpack->size;
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, start_durable_ts,
- start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare,
- repeat_count, false, true));
+ WT_ERR(__rec_col_var_helper(
+ session, r, salvage, last.value, &tw, repeat_count, false, true));
/* Track if page has overflow items. */
r->ovfl_items = true;
@@ -859,18 +780,14 @@ record_loop:
break;
}
} else {
- start_durable_ts = upd_select.start_durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_durable_ts = upd_select.stop_durable_ts;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- prepare = upd_select.prepare;
+ __wt_time_window_copy(&tw, &upd_select.tw);
switch (upd->type) {
case WT_UPDATE_MODIFY:
cbt->slot = WT_COL_SLOT(page, cip);
- WT_ERR(__wt_value_return_upd(cbt, upd));
+ WT_ERR(
+ __wt_modify_reconstruct_from_upd_list(session, cbt, upd, cbt->upd_value));
+ WT_ERR(__wt_value_return(cbt, cbt->upd_value));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
update_no_copy = false;
@@ -880,13 +797,7 @@ record_loop:
size = upd->size;
break;
case WT_UPDATE_TOMBSTONE:
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- prepare = false;
+ __wt_time_window_init(&tw);
deleted = true;
break;
default:
@@ -902,9 +813,7 @@ compare:
* record number, we've been doing that all along.
*/
if (rle != 0) {
- if ((last.start_durable_ts == start_durable_ts && last.start_ts == start_ts &&
- last.start_txn == start_txn && last.stop_durable_ts == stop_durable_ts &&
- last.stop_ts == stop_ts && last.stop_txn == stop_txn) &&
+ if (__wt_time_windows_equal(&tw, &last.tw) &&
((deleted && last.deleted) ||
(!deleted && !last.deleted && last.value->size == size &&
memcmp(last.value->data, data, size) == 0))) {
@@ -914,17 +823,13 @@ compare:
* tombstone to write to disk and the deletion of the keys must be globally
* visible.
*/
- WT_ASSERT(session,
- (!deleted && !last.deleted) ||
- (last.start_durable_ts == WT_TS_NONE && last.start_ts == WT_TS_NONE &&
- last.start_txn == WT_TXN_NONE && last.stop_durable_ts == WT_TS_NONE &&
- last.stop_ts == WT_TS_MAX && last.stop_txn == WT_TXN_MAX));
+ WT_ASSERT(
+ session, (!deleted && !last.deleted) || __wt_time_window_is_empty(&last.tw));
rle += repeat_count;
continue;
}
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, last.start_durable_ts,
- last.start_ts, last.start_txn, last.stop_durable_ts, last.stop_ts, last.stop_txn,
- last.prepare, rle, last.deleted, false));
+ WT_ERR(__rec_col_var_helper(
+ session, r, salvage, last.value, &last.tw, rle, last.deleted, false));
}
/*
@@ -948,17 +853,7 @@ compare:
WT_ERR(__wt_buf_set(session, last.value, data, size));
}
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
- last.start_durable_ts = start_durable_ts;
- last.start_ts = start_ts;
- last.start_txn = start_txn;
- last.stop_durable_ts = stop_durable_ts;
- last.stop_ts = stop_ts;
- last.stop_txn = stop_txn;
- last.prepare = prepare;
+ __wt_time_window_copy(&last.tw, &tw);
last.deleted = deleted;
rle = repeat_count;
}
@@ -1005,8 +900,7 @@ compare:
}
while (src_recno <= n) {
- update_no_copy =
- upd == NULL || !F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK); /* No data copy */
+ update_no_copy = true; /* No data copy */
deleted = false;
/*
@@ -1022,10 +916,7 @@ compare:
* tombstone to write to disk and the deletion of the keys must be globally
* visible.
*/
- WT_ASSERT(session, last.start_durable_ts == WT_TS_NONE &&
- last.start_ts == WT_TS_NONE && last.start_txn == WT_TXN_NONE &&
- last.stop_durable_ts == WT_TS_NONE && last.stop_ts == WT_TS_MAX &&
- last.stop_txn == WT_TXN_MAX);
+ WT_ASSERT(session, __wt_time_window_is_empty(&last.tw));
/*
* The record adjustment is decremented by one so we can naturally fall into the
* RLE accounting below, where we increment rle by one, then continue in the
@@ -1034,36 +925,16 @@ compare:
skip = (n - src_recno) - 1;
rle += skip;
src_recno += skip;
- } else {
+ } else
/* Set time pairs for the first deleted key in a deleted range. */
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- prepare = false;
- }
+ __wt_time_window_init(&tw);
} else if (upd == NULL) {
/* The updates on the key are all uncommitted so we write a deleted key to disk. */
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- prepare = false;
-
+ __wt_time_window_init(&tw);
deleted = true;
} else {
/* Set time pairs for a key. */
- start_durable_ts = upd_select.start_durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_durable_ts = upd_select.stop_durable_ts;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- prepare = upd_select.prepare;
+ __wt_time_window_copy(&tw, &upd_select.tw);
switch (upd->type) {
case WT_UPDATE_MODIFY:
@@ -1071,7 +942,9 @@ compare:
* Impossible slot, there's no backing on-page item.
*/
cbt->slot = UINT32_MAX;
- WT_ERR(__wt_value_return_upd(cbt, upd));
+ WT_ERR(
+ __wt_modify_reconstruct_from_upd_list(session, cbt, upd, cbt->upd_value));
+ WT_ERR(__wt_value_return(cbt, cbt->upd_value));
data = cbt->iface.value.data;
size = (uint32_t)cbt->iface.value.size;
update_no_copy = false;
@@ -1081,13 +954,7 @@ compare:
size = upd->size;
break;
case WT_UPDATE_TOMBSTONE:
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- prepare = false;
+ __wt_time_window_init(&tw);
deleted = true;
break;
default:
@@ -1100,12 +967,7 @@ compare:
* the same thing.
*/
if (rle != 0) {
- /*
- * FIXME-PM-1521: Follow up issue with clang in WT-5341.
- */
- if ((last.start_durable_ts == start_durable_ts && last.start_ts == start_ts &&
- last.start_txn == start_txn && last.stop_durable_ts == stop_durable_ts &&
- last.stop_ts == stop_ts && last.stop_txn == stop_txn) &&
+ if (__wt_time_windows_equal(&last.tw, &tw) &&
((deleted && last.deleted) ||
(!deleted && !last.deleted && last.value->size == size &&
memcmp(last.value->data, data, size) == 0))) {
@@ -1115,17 +977,17 @@ compare:
* tombstone to write to disk and the deletion of the keys must be globally
* visible.
*/
- WT_ASSERT(session, (!deleted && !last.deleted) ||
- (last.start_durable_ts == start_durable_ts && last.start_ts == WT_TS_NONE &&
- last.start_txn == WT_TXN_NONE &&
- last.stop_durable_ts == stop_durable_ts &&
- last.stop_ts == WT_TS_MAX && last.stop_txn == WT_TXN_MAX));
+ WT_ASSERT(session,
+ (!deleted && !last.deleted) ||
+ (last.tw.durable_start_ts == tw.durable_start_ts &&
+ last.tw.start_ts == WT_TS_NONE && last.tw.start_txn == WT_TXN_NONE &&
+ last.tw.durable_stop_ts == tw.durable_stop_ts &&
+ last.tw.stop_ts == WT_TS_MAX && last.tw.stop_txn == WT_TXN_MAX));
++rle;
goto next;
}
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, last.start_durable_ts,
- last.start_ts, last.start_txn, last.stop_durable_ts, last.stop_ts, last.stop_txn,
- last.prepare, rle, last.deleted, false));
+ WT_ERR(__rec_col_var_helper(
+ session, r, salvage, last.value, &last.tw, rle, last.deleted, false));
}
/*
@@ -1143,18 +1005,8 @@ compare:
WT_ERR(__wt_buf_set(session, last.value, data, size));
}
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
/* Ready for the next loop, reset the RLE counter. */
- last.start_durable_ts = start_durable_ts;
- last.start_ts = start_ts;
- last.start_txn = start_txn;
- last.stop_durable_ts = stop_durable_ts;
- last.stop_ts = stop_ts;
- last.stop_txn = stop_txn;
- last.prepare = prepare;
+ __wt_time_window_copy(&last.tw, &tw);
last.deleted = deleted;
rle = 1;
@@ -1178,18 +1030,13 @@ next:
/* If we were tracking a record, write it. */
if (rle != 0)
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value, last.start_durable_ts,
- last.start_ts, last.start_txn, last.stop_durable_ts, last.stop_ts, last.stop_txn,
- last.prepare, rle, last.deleted, false));
+ WT_ERR(__rec_col_var_helper(
+ session, r, salvage, last.value, &last.tw, rle, last.deleted, false));
/* Write the remnant page. */
ret = __wt_rec_split_finish(session, r);
err:
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
__wt_scr_free(session, &orig);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index d65768aba49..5365e077b65 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -80,8 +80,7 @@ __rec_cell_build_int_key(
WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
*is_ovflp = true;
- return (__wt_rec_cell_build_ovfl(session, r, key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE,
- WT_TXN_NONE, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, false, 0));
+ return (__wt_rec_cell_build_ovfl(session, r, key, WT_CELL_KEY_OVFL, NULL, 0));
}
key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
@@ -172,8 +171,7 @@ __rec_cell_build_leaf_key(
WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
*is_ovflp = true;
- return (__wt_rec_cell_build_ovfl(session, r, key, WT_CELL_KEY_OVFL, WT_TS_NONE,
- WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, false, 0));
+ return (__wt_rec_cell_build_ovfl(session, r, key, WT_CELL_KEY_OVFL, NULL, 0));
}
return (__rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
}
@@ -195,19 +193,20 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_CURSOR *cursor;
WT_RECONCILE *r;
WT_REC_KV *key, *val;
+ WT_TIME_WINDOW tw;
bool ovfl_key;
r = cbulk->reconcile;
btree = S2BT(session);
cursor = &cbulk->cbt.iface;
+ __wt_time_window_init(&tw);
key = &r->k;
val = &r->v;
WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */
cursor->key.data, cursor->key.size, &ovfl_key));
WT_RET(__wt_rec_cell_build_val(session, r, cursor->value.data, /* Build value cell */
- cursor->value.size, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX,
- false, 0));
+ cursor->value.size, &tw, 0));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
@@ -230,12 +229,10 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
else {
r->all_empty_value = false;
if (btree->dictionary)
- WT_RET(__wt_rec_dict_replace(session, r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE,
- WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, false, 0, val));
+ WT_RET(__wt_rec_dict_replace(session, r, &tw, 0, val));
__wt_rec_image_copy(session, r, val);
}
- __wt_rec_addr_ts_update(
- r, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TS_NONE, WT_TS_MAX, WT_TXN_MAX, false);
+ __wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -279,9 +276,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the key and value onto the page. */
__wt_rec_image_copy(session, r, key);
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, addr->newest_start_durable_ts, addr->oldest_start_ts,
- addr->oldest_start_txn, addr->newest_stop_durable_ts, addr->newest_stop_ts,
- addr->newest_stop_txn, addr->prepare);
+ __wt_time_aggregate_merge(&r->cur_ptr->ta, &addr->ta);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -306,10 +301,9 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_PAGE *child;
WT_REC_KV *key, *val;
WT_REF *ref;
- wt_timestamp_t newest_start_durable_ts, newest_stop_durable_ts, newest_stop_ts, oldest_start_ts;
+ WT_TIME_AGGREGATE ta;
size_t key_overflow_size, size;
- uint64_t newest_stop_txn, oldest_start_txn;
- bool force, hazard, key_onpage_ovfl, ovfl_key, prepare;
+ bool force, hazard, key_onpage_ovfl, ovfl_key;
const void *p;
btree = S2BT(session);
@@ -437,16 +431,10 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
*/
if (__wt_off_page(page, addr)) {
__wt_rec_cell_build_addr(session, r, addr, NULL, state == WT_CHILD_PROXY, WT_RECNO_OOB);
- newest_start_durable_ts = addr->newest_start_durable_ts;
- oldest_start_ts = addr->oldest_start_ts;
- oldest_start_txn = addr->oldest_start_txn;
- newest_stop_durable_ts = addr->newest_stop_durable_ts;
- newest_stop_ts = addr->newest_stop_ts;
- newest_stop_txn = addr->newest_stop_txn;
- prepare = addr->prepare;
+ __wt_time_aggregate_copy(&ta, &addr->ta);
} else {
__wt_cell_unpack(session, page, ref->addr, vpack);
- if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED)) {
+ if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED)) {
/*
* The transaction ids are cleared after restart. Repack the cell with new validity
* to flush the cleared transaction ids.
@@ -464,13 +452,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
val->cell_len = 0;
val->len = val->buf.size;
}
- newest_start_durable_ts = vpack->newest_start_durable_ts;
- oldest_start_ts = vpack->oldest_start_ts;
- oldest_start_txn = vpack->oldest_start_txn;
- newest_stop_durable_ts = vpack->newest_stop_durable_ts;
- newest_stop_ts = vpack->newest_stop_ts;
- newest_stop_txn = vpack->newest_stop_txn;
- prepare = F_ISSET(vpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&ta, &vpack->ta);
}
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -524,8 +506,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the key and value onto the page. */
__wt_rec_image_copy(session, r, key);
__wt_rec_image_copy(session, r, val);
- __wt_rec_addr_ts_update(r, newest_start_durable_ts, oldest_start_ts, oldest_start_txn,
- newest_stop_durable_ts, newest_stop_ts, newest_stop_txn, prepare);
+ __wt_time_aggregate_merge(&r->cur_ptr->ta, &ta);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -545,15 +526,16 @@ err:
* Return if a zero-length item can be written.
*/
static bool
-__rec_row_zero_len(WT_SESSION_IMPL *session, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__rec_row_zero_len(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
/*
- * The item must be globally visible because we're not writing anything on the page.
+ * The item must be globally visible because we're not writing anything on the page. Don't be
+ * tempted to check the time window against the default here - the check is subtly different due
+ * to the grouping.
*/
- return ((stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) &&
- ((start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE) ||
- __wt_txn_visible_all(session, start_txn, start_ts)));
+ return ((tw->stop_ts == WT_TS_MAX && tw->stop_txn == WT_TXN_MAX) &&
+ ((tw->start_ts == WT_TS_NONE && tw->start_txn == WT_TXN_NONE) ||
+ __wt_txn_visible_all(session, tw->start_txn, tw->start_ts)));
}
/*
@@ -565,13 +547,11 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
{
WT_BTREE *btree;
WT_CURSOR_BTREE *cbt;
- WT_DECL_RET;
WT_REC_KV *key, *val;
+ WT_TIME_WINDOW tw;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_durable_ts, start_ts, stop_durable_ts, stop_ts;
- uint64_t start_txn, stop_txn;
- bool ovfl_key, prepare;
+ bool ovfl_key;
btree = S2BT(session);
@@ -588,13 +568,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
if ((upd = upd_select.upd) == NULL)
continue;
- start_durable_ts = upd_select.start_durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_durable_ts = upd_select.stop_durable_ts;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- prepare = upd_select.prepare;
+ __wt_time_window_copy(&tw, &upd_select.tw);
switch (upd->type) {
case WT_UPDATE_MODIFY:
@@ -602,28 +576,22 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
* Impossible slot, there's no backing on-page item.
*/
cbt->slot = UINT32_MAX;
- WT_RET(__wt_value_return_upd(cbt, upd));
- WT_RET(__wt_rec_cell_build_val(session, r, cbt->iface.value.data, cbt->iface.value.size,
- start_durable_ts, start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare,
- 0));
+ WT_RET(__wt_modify_reconstruct_from_upd_list(session, cbt, upd, cbt->upd_value));
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
+ WT_RET(__wt_rec_cell_build_val(
+ session, r, cbt->iface.value.data, cbt->iface.value.size, &tw, 0));
break;
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
- WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, start_durable_ts,
- start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, 0));
+ WT_RET(__wt_rec_cell_build_val(session, r, upd->data, upd->size, &tw, 0));
break;
case WT_UPDATE_TOMBSTONE:
continue;
default:
- ret = __wt_illegal_value(session, upd->type);
- WT_ERR(ret);
+ WT_RET(__wt_illegal_value(session, upd->type));
}
- /* Free the update if it is external. */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
/* Build key cell. */
- WT_ERR(__rec_cell_build_leaf_key(
+ WT_RET(__rec_cell_build_leaf_key(
session, r, WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
/* Boundary: split or write the page. */
@@ -635,36 +603,29 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
if (r->key_pfx_compress_conf) {
r->key_pfx_compress = false;
if (!ovfl_key)
- WT_ERR(__rec_cell_build_leaf_key(session, r, NULL, 0, &ovfl_key));
+ WT_RET(__rec_cell_build_leaf_key(session, r, NULL, 0, &ovfl_key));
}
- WT_ERR(__wt_rec_split_crossing_bnd(session, r, key->len + val->len, false));
+ WT_RET(__wt_rec_split_crossing_bnd(session, r, key->len + val->len, false));
}
/* Copy the key/value pair onto the page. */
__wt_rec_image_copy(session, r, key);
- if (val->len == 0 && __rec_row_zero_len(session, start_ts, start_txn, stop_ts, stop_txn))
+ if (val->len == 0 && __rec_row_zero_len(session, &tw))
r->any_empty_value = true;
else {
r->all_empty_value = false;
if (btree->dictionary)
- WT_ERR(__wt_rec_dict_replace(session, r, start_durable_ts, start_ts, start_txn,
- stop_durable_ts, stop_ts, stop_txn, prepare, 0, val));
+ WT_RET(__wt_rec_dict_replace(session, r, &tw, 0, val));
__wt_rec_image_copy(session, r, val);
}
- __wt_rec_addr_ts_update(
- r, start_durable_ts, start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare);
+ __wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
}
-err:
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
- return (ret);
+ return (0);
}
/*
@@ -673,8 +634,7 @@ err:
*/
static inline int
__rec_cell_repack(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r, WT_CELL_UNPACK *vpack,
- wt_timestamp_t start_durable_ts, uint64_t start_txn, wt_timestamp_t start_ts,
- wt_timestamp_t stop_durable_ts, uint64_t stop_txn, wt_timestamp_t stop_ts, bool prepare)
+ WT_TIME_WINDOW *tw)
{
WT_DECL_ITEM(tmpval);
WT_DECL_RET;
@@ -693,8 +653,7 @@ __rec_cell_repack(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r, WT
p = tmpval->data;
size = tmpval->size;
}
- WT_ERR(__wt_rec_cell_build_val(session, r, p, size, start_durable_ts, start_ts, start_txn,
- stop_durable_ts, stop_ts, stop_txn, prepare, 0));
+ WT_ERR(__wt_rec_cell_build_val(session, r, p, size, tw, 0));
err:
__wt_scr_free(session, &tmpval);
@@ -721,18 +680,18 @@ __wt_rec_row_leaf(
WT_PAGE *page;
WT_REC_KV *key, *val;
WT_ROW *rip;
+ WT_TIME_WINDOW tw;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_durable_ts, start_ts, stop_durable_ts, stop_ts;
- uint64_t slvg_skip, start_txn, stop_txn;
+ uint64_t slvg_skip;
uint32_t i;
- bool dictionary, key_onpage_ovfl, ovfl_key, prepare;
+ bool dictionary, key_onpage_ovfl, ovfl_key;
void *copy;
btree = S2BT(session);
page = pageref->page;
- prepare = false;
slvg_skip = salvage == NULL ? 0 : salvage->skip;
+ __wt_time_window_init(&tw);
cbt = &r->update_modify_cbt;
cbt->iface.session = (WT_SESSION *)session;
@@ -796,37 +755,19 @@ __wt_rec_row_leaf(
* pair information, else take the time pairs from the cell.
*/
if (upd == NULL) {
- if (!salvage) {
- start_durable_ts = vpack->durable_start_ts;
- start_ts = vpack->start_ts;
- start_txn = vpack->start_txn;
- stop_durable_ts = vpack->durable_stop_ts;
- stop_ts = vpack->stop_ts;
- stop_txn = vpack->stop_txn;
- } else {
- start_durable_ts = WT_TS_NONE;
- start_ts = WT_TS_NONE;
- start_txn = WT_TXN_NONE;
- stop_durable_ts = WT_TS_NONE;
- stop_ts = WT_TS_MAX;
- stop_txn = WT_TXN_MAX;
- }
- } else {
- start_durable_ts = upd_select.start_durable_ts;
- start_ts = upd_select.start_ts;
- start_txn = upd_select.start_txn;
- stop_durable_ts = upd_select.stop_durable_ts;
- stop_ts = upd_select.stop_ts;
- stop_txn = upd_select.stop_txn;
- prepare = upd_select.prepare;
- }
+ if (!salvage)
+ __wt_time_window_copy(&tw, &vpack->tw);
+ else
+ __wt_time_window_init(&tw);
+ } else
+ __wt_time_window_copy(&tw, &upd_select.tw);
/*
* If we reconcile an on disk key with a globally visible stop time pair and there are no
* new updates for that key, skip writing that key.
*/
- if (upd == NULL && (stop_txn != WT_TXN_MAX || stop_ts != WT_TS_MAX) &&
- __wt_txn_visible_all(session, stop_txn, stop_ts))
+ if (upd == NULL && (tw.stop_txn != WT_TXN_MAX || tw.stop_ts != WT_TS_MAX) &&
+ __wt_txn_visible_all(session, tw.stop_txn, tw.stop_ts))
upd = &upd_tombstone;
/* Build value cell. */
@@ -841,11 +782,10 @@ __wt_rec_row_leaf(
* Repack the cell if we clear the transaction ids in the cell.
*/
if (vpack->raw == WT_CELL_VALUE_COPY) {
- WT_ERR(__rec_cell_repack(session, btree, r, vpack, start_durable_ts, start_txn,
- start_ts, stop_durable_ts, stop_txn, stop_ts, prepare));
+ WT_ERR(__rec_cell_repack(session, btree, r, vpack, &tw));
dictionary = true;
- } else if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED)) {
+ } else if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED)) {
/*
* The transaction ids are cleared after restart. Repack the cell to flush the
* cleared transaction ids.
@@ -857,13 +797,11 @@ __wt_rec_row_leaf(
val->buf.size = vpack->size;
/* Rebuild the cell. */
- val->cell_len = __wt_cell_pack_ovfl(session, &val->cell, vpack->raw,
- start_durable_ts, start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn,
- prepare, 0, val->buf.size);
+ val->cell_len =
+ __wt_cell_pack_ovfl(session, &val->cell, vpack->raw, &tw, 0, val->buf.size);
val->len = val->cell_len + val->buf.size;
} else
- WT_ERR(__rec_cell_repack(session, btree, r, vpack, start_durable_ts, start_txn,
- start_ts, stop_durable_ts, stop_txn, stop_ts, prepare));
+ WT_ERR(__rec_cell_repack(session, btree, r, vpack, &tw));
dictionary = true;
} else {
@@ -884,16 +822,15 @@ __wt_rec_row_leaf(
switch (upd->type) {
case WT_UPDATE_MODIFY:
cbt->slot = WT_ROW_SLOT(page, rip);
- WT_ERR(__wt_value_return_upd(cbt, upd));
- WT_ERR(__wt_rec_cell_build_val(session, r, cbt->iface.value.data,
- cbt->iface.value.size, start_durable_ts, start_ts, start_txn, stop_durable_ts,
- stop_ts, stop_txn, prepare, 0));
+ WT_ERR(__wt_modify_reconstruct_from_upd_list(session, cbt, upd, cbt->upd_value));
+ WT_ERR(__wt_value_return(cbt, cbt->upd_value));
+ WT_ERR(__wt_rec_cell_build_val(
+ session, r, cbt->iface.value.data, cbt->iface.value.size, &tw, 0));
dictionary = true;
break;
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
- WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, start_durable_ts,
- start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, 0));
+ WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, &tw, 0));
dictionary = true;
break;
case WT_UPDATE_TOMBSTONE:
@@ -940,9 +877,6 @@ __wt_rec_row_leaf(
default:
WT_ERR(__wt_illegal_value(session, upd->type));
}
- /* Free the update if it is external. */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
}
/*
@@ -1022,17 +956,15 @@ build:
/* Copy the key/value pair onto the page. */
__wt_rec_image_copy(session, r, key);
- if (val->len == 0 && __rec_row_zero_len(session, start_ts, start_txn, stop_ts, stop_txn))
+ if (val->len == 0 && __rec_row_zero_len(session, &tw))
r->any_empty_value = true;
else {
r->all_empty_value = false;
if (dictionary && btree->dictionary)
- WT_ERR(__wt_rec_dict_replace(session, r, start_durable_ts, start_ts, start_txn,
- stop_durable_ts, stop_ts, stop_txn, prepare, 0, val));
+ WT_ERR(__wt_rec_dict_replace(session, r, &tw, 0, val));
__wt_rec_image_copy(session, r, val);
}
- __wt_rec_addr_ts_update(
- r, start_durable_ts, start_ts, start_txn, stop_durable_ts, stop_ts, stop_txn, prepare);
+ __wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -1047,10 +979,6 @@ leaf_insert:
ret = __wt_rec_split_finish(session, r);
err:
- /* Free the update if it is external. */
- if (upd != NULL && F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
-
__wt_scr_free(session, &tmpkey);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 36e2de9ccc5..aa44301a21d 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -76,11 +76,18 @@ __rec_append_orig_value(
return (0);
/*
+ * Prepared updates should already be in the update list, add the original update to the
+ * list only when the prepared update is a tombstone.
+ */
+ if (F_ISSET(unpack, WT_CELL_UNPACK_PREPARE) && upd->type != WT_UPDATE_TOMBSTONE)
+ return (0);
+
+ /*
* Done if the on page value already appears on the update list. We can't do the same check
* for stop time pair because we may still need to append the onpage value if only the
* tombstone is on the update chain.
*/
- if (unpack->start_ts == upd->start_ts && unpack->start_txn == upd->txnid &&
+ if (unpack->tw.start_ts == upd->start_ts && unpack->tw.start_txn == upd->txnid &&
upd->type != WT_UPDATE_TOMBSTONE)
return (0);
@@ -104,8 +111,8 @@ __rec_append_orig_value(
}
/* Done if the stop time pair of the onpage cell is globally visible. */
- if ((unpack->stop_ts != WT_TS_MAX || unpack->stop_txn != WT_TXN_MAX) &&
- __wt_txn_visible_all(session, unpack->stop_txn, unpack->stop_ts))
+ if ((unpack->tw.stop_ts != WT_TS_MAX || unpack->tw.stop_txn != WT_TXN_MAX) &&
+ __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.stop_ts))
return (0);
/* We need the original on-page value for some reader: get a copy. */
@@ -113,9 +120,9 @@ __rec_append_orig_value(
WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
WT_ERR(__wt_upd_alloc(session, tmp, WT_UPDATE_STANDARD, &append, &size));
total_size += size;
- append->txnid = unpack->start_txn;
- append->start_ts = unpack->start_ts;
- append->durable_ts = unpack->durable_start_ts;
+ append->txnid = unpack->tw.start_txn;
+ append->start_ts = unpack->tw.start_ts;
+ append->durable_ts = unpack->tw.durable_start_ts;
/*
* Additionally, we need to append a tombstone before the onpage value we're about to append to
@@ -123,20 +130,26 @@ __rec_append_orig_value(
* delete a value respectively at timestamp 0 and 10, and later insert it again at 20. We need
* the tombstone to tell us there is no value between 10 and 20.
*/
- if (unpack->stop_ts != WT_TS_MAX || unpack->stop_txn != WT_TXN_MAX) {
+ if (unpack->tw.stop_ts != WT_TS_MAX || unpack->tw.stop_txn != WT_TXN_MAX) {
/* No need to append the tombstone if it is already in the update chain. */
if (oldest_upd->type != WT_UPDATE_TOMBSTONE) {
WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, &size));
total_size += size;
- tombstone->txnid = unpack->stop_txn;
- tombstone->start_ts = unpack->stop_ts;
- tombstone->durable_ts = unpack->durable_stop_ts;
+ tombstone->txnid = unpack->tw.stop_txn;
+ tombstone->start_ts = unpack->tw.stop_ts;
+ tombstone->durable_ts = unpack->tw.durable_stop_ts;
tombstone->next = append;
append = tombstone;
} else
- WT_ASSERT(session,
- unpack->stop_ts == oldest_upd->start_ts && unpack->stop_txn == oldest_upd->txnid);
+ /*
+ * Once the prepared update is resolved, the in-memory update and on-disk written copy
+ * doesn't have same timestamp due to replacing of prepare timestamp with commit and
+ * durable timestamps. Don't compare them when the on-disk version is a prepare.
+ */
+ WT_ASSERT(session, F_ISSET(unpack, WT_CELL_UNPACK_PREPARE) ||
+ (unpack->tw.stop_ts == oldest_upd->start_ts &&
+ unpack->tw.stop_txn == oldest_upd->txnid));
}
/* Append the new entry into the update list. */
@@ -161,6 +174,9 @@ static inline bool
__rec_need_save_upd(
WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE_SELECT *upd_select, bool has_newer_updates)
{
+ if (upd_select->tw.prepare)
+ return (true);
+
if (F_ISSET(r, WT_REC_EVICT) && has_newer_updates)
return (true);
@@ -176,8 +192,8 @@ __rec_need_save_upd(
if (F_ISSET(r, WT_REC_CHECKPOINT) && upd_select->upd == NULL)
return (false);
- return (!__wt_txn_visible_all(session, upd_select->stop_txn, upd_select->stop_ts) &&
- !__wt_txn_visible_all(session, upd_select->start_txn, upd_select->start_ts));
+ return (!__wt_txn_visible_all(session, upd_select->tw.stop_txn, upd_select->tw.stop_ts) &&
+ !__wt_txn_visible_all(session, upd_select->tw.start_txn, upd_select->tw.start_ts));
}
/*
@@ -191,10 +207,12 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_PAGE *page;
+ WT_TIME_WINDOW *select_tw;
WT_UPDATE *first_txn_upd, *first_upd, *upd, *last_upd, *tombstone;
wt_timestamp_t max_ts;
size_t upd_memsize;
uint64_t max_txn, txnid;
+ char time_string[WT_TIME_STRING_SIZE];
bool has_newer_updates, is_hs_page, supd_restore, upd_saved;
/*
@@ -202,13 +220,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* both must be initialized.
*/
upd_select->upd = NULL;
- upd_select->start_durable_ts = WT_TS_NONE;
- upd_select->start_ts = WT_TS_NONE;
- upd_select->start_txn = WT_TXN_NONE;
- upd_select->stop_durable_ts = WT_TS_NONE;
- upd_select->stop_ts = WT_TS_MAX;
- upd_select->stop_txn = WT_TXN_MAX;
- upd_select->prepare = false;
+ select_tw = &upd_select->tw;
+ __wt_time_window_init(select_tw);
page = r->page;
first_txn_upd = upd = last_upd = tombstone = NULL;
@@ -264,20 +277,24 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
continue;
}
+ /* Ignore prepared updates if it is not eviction. */
if (upd->prepare_state == WT_PREPARE_LOCKED ||
upd->prepare_state == WT_PREPARE_INPROGRESS) {
- WT_ASSERT(session, upd_select->upd == NULL);
- has_newer_updates = true;
- if (upd->start_ts > max_ts)
- max_ts = upd->start_ts;
-
- /*
- * Track the oldest update not on the page, used to decide whether reads can use the
- * page image, hence using the start rather than the durable timestamp.
- */
- if (upd->start_ts < r->min_skipped_ts)
- r->min_skipped_ts = upd->start_ts;
- continue;
+ WT_ASSERT(session, upd_select->upd == NULL || upd_select->upd->txnid == upd->txnid);
+ if (!F_ISSET(r, WT_REC_EVICT)) {
+ has_newer_updates = true;
+ if (upd->start_ts > max_ts)
+ max_ts = upd->start_ts;
+
+ /*
+ * Track the oldest update not on the page, used to decide whether reads can use the
+ * page image, hence using the start rather than the durable timestamp.
+ */
+ if (upd->start_ts < r->min_skipped_ts)
+ r->min_skipped_ts = upd->start_ts;
+ continue;
+ } else
+ WT_ASSERT(session, upd->prepare_state == WT_PREPARE_INPROGRESS);
}
/* Track the first update with non-zero timestamp. */
@@ -322,7 +339,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
*/
if (has_newer_updates && F_ISSET(r, WT_REC_CLEAN_AFTER_REC | WT_REC_VISIBILITY_ERR)) {
if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
- WT_PANIC_RET(session, EINVAL, "reconciliation error, update not visible");
+ WT_RET_PANIC(session, EINVAL, "reconciliation error, update not visible");
return (__wt_set_return(session, EBUSY));
}
@@ -341,6 +358,14 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
*/
if (upd != NULL) {
/*
+ * Mark the prepare flag if the selected update is an uncommitted prepare. As tombstone
+ * updates are never returned to write, set this flag before we move into the previous
+ * update to write.
+ */
+ if (upd->prepare_state == WT_PREPARE_INPROGRESS)
+ select_tw->prepare = 1;
+
+ /*
* If the newest is a tombstone then select the update before it and set the end of the
* visibility window to its time pair as appropriate to indicate that we should return "not
* found" for reads after this point.
@@ -349,9 +374,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* indicate that the value is visible to any timestamp/transaction id ahead of it.
*/
if (upd->type == WT_UPDATE_TOMBSTONE) {
- upd_select->stop_ts = upd->start_ts;
- upd_select->stop_txn = upd->txnid;
- upd_select->stop_durable_ts = upd->durable_ts;
+ __wt_time_window_set_stop(select_tw, upd);
tombstone = upd;
/* Find the update this tombstone applies to. */
@@ -366,10 +389,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
}
if (upd != NULL) {
/* The beginning of the validity window is the selected update's time pair. */
- upd_select->start_ts = upd->start_ts;
- upd_select->start_durable_ts = upd->durable_ts;
- upd_select->start_txn = upd->txnid;
- } else if (upd_select->stop_ts != WT_TS_NONE || upd_select->stop_txn != WT_TXN_NONE) {
+ __wt_time_window_set_start(select_tw, upd);
+ } else if (select_tw->stop_ts != WT_TS_NONE || select_tw->stop_txn != WT_TXN_NONE) {
/* If we only have a tombstone in the update list, we must have an ondisk value. */
WT_ASSERT(session, vpack != NULL && tombstone != NULL);
/*
@@ -384,13 +405,11 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
*/
WT_ERR(__rec_append_orig_value(session, page, tombstone, vpack));
WT_ASSERT(session, last_upd->next != NULL &&
- last_upd->next->txnid == vpack->start_txn &&
- last_upd->next->start_ts == vpack->start_ts &&
+ last_upd->next->txnid == vpack->tw.start_txn &&
+ last_upd->next->start_ts == vpack->tw.start_ts &&
last_upd->next->type == WT_UPDATE_STANDARD && last_upd->next->next == NULL);
upd_select->upd = last_upd->next;
- upd_select->start_ts = last_upd->next->start_ts;
- upd_select->start_durable_ts = last_upd->next->durable_ts;
- upd_select->start_txn = last_upd->next->txnid;
+ __wt_time_window_set_start(select_tw, last_upd->next);
}
}
@@ -405,17 +424,15 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
* time pair. While unusual, it is permitted for a single transaction to insert and then remove
* a record. We don't want to generate a warning in that case.
*/
- if (upd_select->stop_ts < upd_select->start_ts ||
- (upd_select->stop_ts == upd_select->start_ts &&
- upd_select->stop_txn < upd_select->start_txn)) {
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ if (select_tw->stop_ts < select_tw->start_ts ||
+ (select_tw->stop_ts == select_tw->start_ts && select_tw->stop_txn < select_tw->start_txn)) {
__wt_verbose(session, WT_VERB_TIMESTAMP,
- "Warning: fixing out-of-order timestamps remove at %s earlier than value at %s",
- __wt_timestamp_to_string(upd_select->stop_ts, ts_string[0]),
- __wt_timestamp_to_string(upd_select->start_ts, ts_string[1]));
- upd_select->start_durable_ts = upd_select->stop_durable_ts;
- upd_select->start_ts = upd_select->stop_ts;
- upd_select->start_txn = upd_select->stop_txn;
+ "Warning: fixing out-of-order timestamps remove earlier than value; time window %s",
+ __wt_time_window_to_string(select_tw, time_string));
+
+ select_tw->durable_start_ts = select_tw->durable_stop_ts;
+ select_tw->start_ts = select_tw->stop_ts;
+ select_tw->start_txn = select_tw->stop_txn;
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 365e5841347..2a4358c585f 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -594,6 +594,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
*/
r->update_modify_cbt.ref = ref;
r->update_modify_cbt.iface.value_format = btree->value_format;
+ r->update_modify_cbt.upd_value = &r->update_modify_cbt._upd_value;
/*
* If we allocated the reconciliation structure and there was an error, clean up. If our caller
@@ -669,6 +670,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
__wt_buf_free(session, &r->_last);
__wt_buf_free(session, &r->update_modify_cbt.iface.value);
+ __wt_buf_free(session, &r->update_modify_cbt._upd_value.buf);
__wt_free(session, r);
}
@@ -788,17 +790,13 @@ __rec_split_chunk_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *
/* Don't touch the key item memory, that memory is reused. */
chunk->key.size = 0;
chunk->entries = 0;
- __wt_rec_addr_ts_init(r, &chunk->newest_start_durable_ts, &chunk->oldest_start_ts,
- &chunk->oldest_start_txn, &chunk->newest_stop_durable_ts, &chunk->newest_stop_ts,
- &chunk->newest_stop_txn, &chunk->prepare);
+ __wt_rec_addr_ts_init(r, &chunk->ta);
chunk->min_recno = WT_RECNO_OOB;
/* Don't touch the key item memory, that memory is reused. */
chunk->min_key.size = 0;
chunk->min_entries = 0;
- __wt_rec_addr_ts_init(r, &chunk->min_newest_start_durable_ts, &chunk->min_oldest_start_ts,
- &chunk->min_oldest_start_txn, &chunk->min_newest_stop_durable_ts, &chunk->min_newest_stop_ts,
- &chunk->min_newest_stop_txn, &chunk->prepare);
+ __wt_rec_addr_ts_init(r, &chunk->ta_min);
chunk->min_offset = 0;
/*
@@ -1111,7 +1109,7 @@ __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len, bool
* page.
*/
if (r->salvage != NULL)
- WT_PANIC_RET(session, WT_PANIC, "%s page too large, attempted split during salvage",
+ WT_RET_PANIC(session, WT_PANIC, "%s page too large, attempted split during salvage",
__wt_page_type_string(r->page->type));
/*
@@ -1215,12 +1213,7 @@ __wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t ne
r->cur_ptr->min_recno = r->recno;
if (S2BT(session)->type == BTREE_ROW)
WT_RET(__rec_split_row_promote(session, r, &r->cur_ptr->min_key, r->page->type));
- r->cur_ptr->min_newest_start_durable_ts = r->cur_ptr->newest_start_durable_ts;
- r->cur_ptr->min_oldest_start_ts = r->cur_ptr->oldest_start_ts;
- r->cur_ptr->min_oldest_start_txn = r->cur_ptr->oldest_start_txn;
- r->cur_ptr->min_newest_stop_durable_ts = r->cur_ptr->newest_stop_durable_ts;
- r->cur_ptr->min_newest_stop_ts = r->cur_ptr->newest_stop_ts;
- r->cur_ptr->min_newest_stop_txn = r->cur_ptr->newest_stop_txn;
+ __wt_time_aggregate_copy(&r->cur_ptr->ta_min, &r->cur_ptr->ta);
/* Assert we're not re-entering this code. */
WT_ASSERT(session, r->cur_ptr->min_offset == 0);
@@ -1271,16 +1264,7 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* boundaries and create a single chunk.
*/
prev_ptr->entries += cur_ptr->entries;
- prev_ptr->newest_start_durable_ts =
- WT_MAX(prev_ptr->newest_start_durable_ts, cur_ptr->newest_start_durable_ts);
- prev_ptr->oldest_start_ts = WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- prev_ptr->oldest_start_txn = WT_MIN(prev_ptr->oldest_start_txn, cur_ptr->oldest_start_txn);
- prev_ptr->newest_stop_durable_ts =
- WT_MAX(prev_ptr->newest_stop_durable_ts, cur_ptr->newest_stop_durable_ts);
- prev_ptr->newest_stop_ts = WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
- prev_ptr->newest_stop_txn = WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
- if (cur_ptr->prepare)
- prev_ptr->prepare = true;
+ __wt_time_aggregate_merge(&prev_ptr->ta, &cur_ptr->ta);
dsk = r->cur_ptr->image.mem;
memcpy((uint8_t *)r->prev_ptr->image.mem + prev_ptr->image.size,
WT_PAGE_HEADER_BYTE(btree, dsk), cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
@@ -1323,25 +1307,11 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
cur_ptr->recno = prev_ptr->min_recno;
WT_RET(
__wt_buf_set(session, &cur_ptr->key, prev_ptr->min_key.data, prev_ptr->min_key.size));
- cur_ptr->newest_start_durable_ts =
- WT_MAX(prev_ptr->newest_start_durable_ts, cur_ptr->newest_start_durable_ts);
- cur_ptr->oldest_start_ts = WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- cur_ptr->oldest_start_txn = WT_MIN(prev_ptr->oldest_start_txn, cur_ptr->oldest_start_txn);
- cur_ptr->newest_stop_durable_ts =
- WT_MAX(prev_ptr->newest_stop_durable_ts, cur_ptr->newest_stop_durable_ts);
- cur_ptr->newest_stop_ts = WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
- cur_ptr->newest_stop_txn = WT_MAX(prev_ptr->newest_stop_txn, cur_ptr->newest_stop_txn);
- if (prev_ptr->prepare)
- cur_ptr->prepare = true;
+ __wt_time_aggregate_merge(&cur_ptr->ta, &prev_ptr->ta);
cur_ptr->image.size += len_to_move;
prev_ptr->entries = prev_ptr->min_entries;
- prev_ptr->newest_start_durable_ts = prev_ptr->min_newest_start_durable_ts;
- prev_ptr->oldest_start_ts = prev_ptr->min_oldest_start_ts;
- prev_ptr->oldest_start_txn = prev_ptr->min_oldest_start_txn;
- prev_ptr->newest_stop_durable_ts = prev_ptr->min_newest_stop_durable_ts;
- prev_ptr->newest_stop_ts = prev_ptr->min_newest_stop_ts;
- prev_ptr->newest_stop_txn = prev_ptr->min_newest_stop_txn;
+ __wt_time_aggregate_copy(&prev_ptr->ta, &prev_ptr->ta_min);
prev_ptr->image.size -= len_to_move;
}
@@ -1722,12 +1692,7 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk
multi = &r->multi[r->multi_next++];
/* Initialize the address (set the addr type for the parent). */
- multi->addr.newest_start_durable_ts = chunk->newest_start_durable_ts;
- multi->addr.oldest_start_ts = chunk->oldest_start_ts;
- multi->addr.oldest_start_txn = chunk->oldest_start_txn;
- multi->addr.newest_stop_durable_ts = chunk->newest_stop_durable_ts;
- multi->addr.newest_stop_ts = chunk->newest_stop_ts;
- multi->addr.newest_stop_txn = chunk->newest_stop_txn;
+ __wt_time_aggregate_copy(&multi->addr.ta, &chunk->ta);
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -2035,12 +2000,14 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_MULTI *multi;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
+ WT_TIME_AGGREGATE ta;
uint32_t i;
btree = S2BT(session);
bm = btree->bm;
mod = page->modify;
ref = r->ref;
+ __wt_time_aggregate_init(&ta);
/*
* This page may have previously been reconciled, and that information is now about to be
@@ -2121,8 +2088,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
*/
ref = r->ref;
if (__wt_ref_is_root(ref)) {
- __wt_checkpoint_tree_reconcile_update(
- session, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+ __wt_checkpoint_tree_reconcile_update(session, &ta);
WT_RET(bm->checkpoint(bm, session, NULL, btree->ckpt, false));
}
@@ -2162,10 +2128,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
mod->mod_disk_image = r->multi->disk_image;
r->multi->disk_image = NULL;
} else {
- __wt_checkpoint_tree_reconcile_update(session, r->multi->addr.newest_start_durable_ts,
- r->multi->addr.oldest_start_ts, r->multi->addr.oldest_start_txn,
- r->multi->addr.newest_stop_durable_ts, r->multi->addr.newest_stop_ts,
- r->multi->addr.newest_stop_txn);
+ __wt_checkpoint_tree_reconcile_update(session, &r->multi->addr.ta);
WT_RET(__wt_bt_write(session, r->wrapup_checkpoint, NULL, NULL, NULL, true,
F_ISSET(r, WT_REC_CHECKPOINT), r->wrapup_checkpoint_compressed));
}
@@ -2267,7 +2230,7 @@ __rec_hs_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
if (multi->supd != NULL) {
- WT_ERR(__wt_hs_insert_updates(session->hs_cursor, S2BT(session), r->page, multi));
+ WT_ERR(__wt_hs_insert_updates(session, r->page, multi));
r->cache_write_hs = true;
if (!multi->supd_restore) {
__wt_free(session, multi->supd);
@@ -2286,9 +2249,7 @@ err:
*/
int
__wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type,
- wt_timestamp_t start_durable_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
- uint64_t rle)
+ WT_TIME_WINDOW *tw, uint64_t rle)
{
WT_BM *bm;
WT_BTREE *btree;
@@ -2343,8 +2304,7 @@ __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *k
WT_ERR(__wt_buf_set(session, &kv->buf, addr, size));
/* Build the cell and return. */
- kv->cell_len = __wt_cell_pack_ovfl(session, &kv->cell, type, start_durable_ts, start_ts,
- start_txn, stop_durable_ts, stop_ts, stop_txn, prepare, rle, kv->buf.size);
+ kv->cell_len = __wt_cell_pack_ovfl(session, &kv->cell, type, tw, rle, kv->buf.size);
kv->len = kv->cell_len + kv->buf.size;
err:
diff --git a/src/third_party/wiredtiger/src/schema/schema_util.c b/src/third_party/wiredtiger/src/schema/schema_util.c
index 16365ba94c5..96a7537e980 100644
--- a/src/third_party/wiredtiger/src/schema/schema_util.c
+++ b/src/third_party/wiredtiger/src/schema/schema_util.c
@@ -26,7 +26,7 @@ __schema_backup_check_int(WT_SESSION_IMPL *session, const char *name)
* There is a window at the end of a backup where the list has been cleared from the connection
* but the flag is still set. It is safe to drop at that point.
*/
- if (!conn->hot_backup || (backup_list = conn->hot_backup_list) == NULL) {
+ if (conn->hot_backup_start == 0 || (backup_list = conn->hot_backup_list) == NULL) {
return (0);
}
for (i = 0; backup_list[i] != NULL; ++i) {
@@ -50,7 +50,7 @@ __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
WT_DECL_RET;
conn = S2C(session);
- if (!conn->hot_backup)
+ if (conn->hot_backup_start == 0)
return (0);
WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, ret = __schema_backup_check_int(session, name));
return (ret);
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index c843d592160..459a0757032 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -1570,20 +1570,12 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
SESSION_API_CALL(session, verify, config, cfg);
WT_ERR(__wt_inmem_unsupported_op(session, NULL));
- /*
- * Even if we're not verifying the history store, we need to be able to iterate over the history
- * store content for another table. In order to do this, we must ignore tombstones in the
- * history store since every history store record is succeeded with a tombstone.
- */
- F_SET(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
-
/* Block out checkpoints to avoid spurious EBUSY errors. */
WT_WITH_CHECKPOINT_LOCK(
session, WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __wt_verify,
NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
WT_ERR(ret);
err:
- F_CLR(session, WT_SESSION_IGNORE_HS_TOMBSTONE);
if (ret != 0)
WT_STAT_CONN_INCR(session, session_table_verify_fail);
else
@@ -1658,7 +1650,7 @@ err:
F_CLR(session, WT_SESSION_RESOLVING_TXN);
} else if (F_ISSET(txn, WT_TXN_RUNNING)) {
if (F_ISSET(txn, WT_TXN_PREPARE))
- WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system");
+ WT_RET_PANIC(session, ret, "failed to commit prepared transaction, failing the system");
WT_TRET(__wt_session_reset_cursors(session, false));
F_SET(session, WT_SESSION_RESOLVING_TXN);
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index 18351ad4062..b8e43ae5a7b 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -332,6 +332,73 @@ __wt_errx_func(WT_SESSION_IMPL *session, const char *func, int line, const char
}
/*
+ * __wt_panic_func --
+ * A standard error message when we panic.
+ */
+int
+__wt_panic_func(WT_SESSION_IMPL *session, int error, const char *func, int line, const char *fmt,
+ ...) WT_GCC_FUNC_ATTRIBUTE((cold)) WT_GCC_FUNC_ATTRIBUTE((format(printf, 5, 6)))
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+{
+ va_list ap;
+
+ /*
+ * Ignore error returns from underlying event handlers, we already have an error value to
+ * return.
+ */
+ va_start(ap, fmt);
+ WT_IGNORE_RET(__eventv(session, false, error, func, line, fmt, ap));
+ va_end(ap);
+
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * If the connection has already panicked, just return the error.
+ */
+ if (session != NULL && F_ISSET(S2C(session), WT_CONN_PANIC))
+ return (WT_PANIC);
+
+ /*
+ * Call the error callback function before setting the connection's panic flag, so applications
+ * can trace the failing thread before being flooded with panic returns from API calls. Using
+ * the variable-arguments list from the current call even thought the format doesn't need it as
+ * I'm not confident of underlying support for a NULL.
+ */
+ va_start(ap, fmt);
+ WT_IGNORE_RET(
+ __eventv(session, false, WT_PANIC, func, line, "the process must exit and restart", ap));
+ va_end(ap);
+
+/*
+ * Confusing #ifdef structure because gcc/clang knows the abort call won't return, and Visual Studio
+ * doesn't.
+ */
+#if defined(HAVE_DIAGNOSTIC)
+ __wt_abort(session); /* Drop core if testing. */
+ /* NOTREACHED */
+#endif
+#if !defined(HAVE_DIAGNOSTIC) || defined(_WIN32)
+ /*
+ * !!!
+ * This function MUST handle a NULL WT_SESSION_IMPL handle.
+ *
+ * Panic the connection;
+ */
+ if (session != NULL)
+ F_SET(S2C(session), WT_CONN_PANIC);
+
+ /*
+ * !!!
+ * Chaos reigns within.
+ * Reflect, repent, and reboot.
+ * Order shall return.
+ */
+ return (WT_PANIC);
+#endif
+}
+
+/*
* __wt_set_return_func --
* Conditionally log the source of an error code and return the error.
*/
@@ -467,67 +534,6 @@ __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v)
}
/*
- * __wt_panic --
- * A standard error message when we panic.
- */
-int
-__wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold))
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
-{
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * If the connection has already panicked, just return the error.
- */
- if (session != NULL && F_ISSET(S2C(session), WT_CONN_PANIC))
- return (WT_PANIC);
-
- /*
- * Call the error callback function before setting the connection's panic flag, so applications
- * can trace the failing thread before being flooded with panic returns from API calls.
- */
- __wt_err(session, WT_PANIC, "the process must exit and restart");
-
-/*
- * Confusing #ifdef structure because gcc/clang knows the abort call won't return, and Visual Studio
- * doesn't.
- */
-#if defined(HAVE_DIAGNOSTIC)
- __wt_abort(session); /* Drop core if testing. */
- /* NOTREACHED */
-#endif
-#if !defined(HAVE_DIAGNOSTIC) || defined(_WIN32)
- /*
- * !!!
- * This function MUST handle a NULL WT_SESSION_IMPL handle.
- *
- * Panic the connection;
- */
- if (session != NULL)
- F_SET(S2C(session), WT_CONN_PANIC);
-
- /*
- * Chaos reigns within. Reflect, repent, and reboot. Order shall return.
- */
- return (WT_PANIC);
-#endif
-}
-
-/*
- * __wt_illegal_value_func --
- * A standard error message when we detect an illegal value.
- */
-int
-__wt_illegal_value_func(WT_SESSION_IMPL *session, uintmax_t v, const char *func, int line)
- WT_GCC_FUNC_ATTRIBUTE((cold)) WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
-{
- __wt_err_func(session, EINVAL, func, line, "%s: 0x%" PRIxMAX,
- "encountered an illegal file format or internal value", v);
- return (__wt_panic(session));
-}
-
-/*
* __wt_inmem_unsupported_op --
* Print a standard error message for an operation that's not supported for in-memory
* configurations.
diff --git a/src/third_party/wiredtiger/src/support/generation.c b/src/third_party/wiredtiger/src/support/generation.c
index bd4a6410f52..e3a60621b5c 100644
--- a/src/third_party/wiredtiger/src/support/generation.c
+++ b/src/third_party/wiredtiger/src/support/generation.c
@@ -136,8 +136,8 @@ __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation)
/* If we're waiting on ourselves, we're deadlocked. */
if (session == s) {
- WT_ASSERT(session, session != s);
- WT_IGNORE_RET(__wt_panic(session));
+ WT_IGNORE_RET(__wt_panic(session, WT_PANIC, "self-deadlock"));
+ return;
}
/*
diff --git a/src/third_party/wiredtiger/src/support/hazard.c b/src/third_party/wiredtiger/src/support/hazard.c
index 45580b7d0cc..336ee99c27c 100644
--- a/src/third_party/wiredtiger/src/support/hazard.c
+++ b/src/third_party/wiredtiger/src/support/hazard.c
@@ -210,7 +210,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref)
* A serious error, we should always find the hazard pointer. Panic, because using a page we
* didn't have pinned down implies corruption.
*/
- WT_PANIC_RET(session, EINVAL, "session %p: clear hazard pointer: %p: not found",
+ WT_RET_PANIC(session, EINVAL, "session %p: clear hazard pointer: %p: not found",
(void *)session, (void *)ref);
}
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index caaaa7abfbb..010ef9a80d1 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -81,7 +81,7 @@ __wt_modify_pack(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries, WT_ITEM **
uint8_t *data;
int i;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor);
*modifyp = NULL;
/*
@@ -346,33 +346,18 @@ __modify_apply_no_overlap(WT_SESSION_IMPL *session, WT_ITEM *value, const size_t
}
/*
- * __wt_modify_apply --
- * Apply a single set of WT_MODIFY changes to a cursor buffer.
- */
-int
-__wt_modify_apply(WT_CURSOR *cursor, const void *modify)
-{
- WT_SESSION_IMPL *session;
- bool sformat;
-
- session = (WT_SESSION_IMPL *)cursor->session;
- sformat = cursor->value_format[0] == 'S';
-
- return (__wt_modify_apply_item(session, &cursor->value, modify, sformat));
-}
-
-/*
* __wt_modify_apply_item --
* Apply a single set of WT_MODIFY changes to a WT_ITEM buffer.
*/
int
-__wt_modify_apply_item(WT_SESSION_IMPL *session, WT_ITEM *value, const void *modify, bool sformat)
+__wt_modify_apply_item(
+ WT_SESSION_IMPL *session, const char *value_format, WT_ITEM *value, const void *modify)
{
WT_MODIFY mod;
size_t datasz, destsz, item_offset, tmp;
const size_t *p;
int napplied, nentries;
- bool overlap;
+ bool overlap, sformat;
/*
* Get the number of modify entries and set a second pointer to reference the replacement data.
@@ -382,6 +367,13 @@ __wt_modify_apply_item(WT_SESSION_IMPL *session, WT_ITEM *value, const void *mod
nentries = (int)tmp;
/*
+ * Modifies can only be applied on a single value field. Make sure we are not applying modifies
+ * to schema with multiple value fields.
+ */
+ WT_ASSERT(session, value_format[1] == '\0');
+ sformat = value_format[0] == 'S';
+
+ /*
* Grow the buffer first. This function is often called using a cursor buffer referencing
* on-page memory and it's easy to overwrite a page. A side-effect of growing the buffer is to
* ensure the buffer's value is in buffer-local memory.
@@ -437,10 +429,11 @@ __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
WT_DECL_RET;
WT_ERR(__wt_modify_pack(cursor, entries, nentries, &modify));
- WT_ERR(__wt_modify_apply(cursor, modify->data));
+ WT_ERR(
+ __wt_modify_apply_item(CUR2S(cursor), cursor->value_format, &cursor->value, modify->data));
err:
- __wt_scr_free((WT_SESSION_IMPL *)cursor->session, &modify);
+ __wt_scr_free(CUR2S(cursor), &modify);
return (ret);
}
@@ -523,3 +516,73 @@ __wt_modify_vector_free(WT_MODIFY_VECTOR *modifies)
__wt_free(modifies->session, modifies->listp);
__wt_modify_vector_init(modifies->session, modifies);
}
+
+/*
+ * __wt_modify_reconstruct_from_upd_list --
+ * Takes an in-memory modify and populates an update value with the reconstructed full value.
+ */
+int
+__wt_modify_reconstruct_from_upd_list(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, WT_UPDATE_VALUE *upd_value)
+{
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_MODIFY_VECTOR modifies;
+ WT_TIME_WINDOW tw;
+
+ WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
+
+ cursor = &cbt->iface;
+
+ /* While we have a pointer to our original modify, grab this information. */
+ upd_value->start_ts = upd->start_ts;
+ upd_value->txnid = upd->txnid;
+ upd_value->prepare_state = upd->prepare_state;
+
+ /* Construct full update */
+ __wt_modify_vector_init(session, &modifies);
+ /* Find a complete update. */
+ for (; upd != NULL; upd = upd->next) {
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ if (WT_UPDATE_DATA_VALUE(upd))
+ break;
+
+ if (upd->type == WT_UPDATE_MODIFY)
+ WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ }
+ /*
+ * If there's no full update, the base item is the on-page item. If the update is a tombstone,
+ * the base item is an empty item.
+ */
+ if (upd == NULL) {
+ /*
+ * Callers of this function set the cursor slot to an impossible value to check we don't try
+ * and return on-page values when the update list should have been sufficient (which
+ * happens, for example, if an update list was truncated, deleting some standard update
+ * required by a previous modify update). Assert the case.
+ */
+ WT_ASSERT(session, cbt->slot != UINT32_MAX);
+
+ WT_ERR(__wt_value_return_buf(cbt, cbt->ref, &upd_value->buf, &tw));
+ /*
+ * Applying modifies on top of a tombstone is invalid. So if we're using the onpage value,
+ * the stop time pair should be unset.
+ */
+ WT_ASSERT(session, tw.stop_txn == WT_TXN_MAX && tw.stop_ts == WT_TS_MAX);
+ } else {
+ /* The base update must not be a tombstone. */
+ WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
+ WT_ERR(__wt_buf_set(session, &upd_value->buf, upd->data, upd->size));
+ }
+ /* Once we have a base item, roll forward through any visible modify updates. */
+ while (modifies.size > 0) {
+ __wt_modify_vector_pop(&modifies, &upd);
+ WT_ERR(__wt_modify_apply_item(session, cursor->value_format, &upd_value->buf, upd->data));
+ }
+ upd_value->type = WT_UPDATE_STANDARD;
+err:
+ __wt_modify_vector_free(&modifies);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 6bd6b74de24..751a86deb53 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -89,7 +89,8 @@ static const char *const __stats_dsrc_desc[] = {
"reconciliation: maximum blocks required for a page", "reconciliation: overflow values written",
"reconciliation: page checksum matches", "reconciliation: page reconciliation calls",
"reconciliation: page reconciliation calls for eviction", "reconciliation: pages deleted",
- "session: object compaction", "transaction: update conflicts",
+ "reconciliation: prepared values written", "session: object compaction",
+ "transaction: update conflicts",
};
int
@@ -274,6 +275,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->rec_pages = 0;
stats->rec_pages_eviction = 0;
stats->rec_page_delete = 0;
+ stats->rec_prepare_value = 0;
stats->session_compact = 0;
stats->txn_update_conflict = 0;
}
@@ -447,6 +449,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->rec_pages += from->rec_pages;
to->rec_pages_eviction += from->rec_pages_eviction;
to->rec_page_delete += from->rec_page_delete;
+ to->rec_prepare_value += from->rec_prepare_value;
to->session_compact += from->session_compact;
to->txn_update_conflict += from->txn_update_conflict;
}
@@ -617,6 +620,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->rec_pages += WT_STAT_READ(from, rec_pages);
to->rec_pages_eviction += WT_STAT_READ(from, rec_pages_eviction);
to->rec_page_delete += WT_STAT_READ(from, rec_page_delete);
+ to->rec_prepare_value += WT_STAT_READ(from, rec_prepare_value);
to->session_compact += WT_STAT_READ(from, session_compact);
to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict);
}
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index aca2a3d9aa7..d968ee8c3b8 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -39,7 +39,7 @@ err:
ret = thread->stop_func(session, thread);
if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL))
- WT_PANIC_MSG(session, ret, "Unrecoverable utility thread error");
+ WT_IGNORE_RET(__wt_panic(session, ret, "Unrecoverable utility thread error"));
/*
* The three cases when threads are expected to stop are:
@@ -232,7 +232,7 @@ err:
group->min = new_min;
WT_TRET(__wt_thread_group_destroy(session, group));
- WT_PANIC_RET(session, ret, "Error while resizing thread group");
+ WT_RET_PANIC(session, ret, "Error while resizing thread group");
}
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 58607c7cf2c..0fdf022bd51 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -598,6 +598,165 @@ __wt_txn_release(WT_SESSION_IMPL *session)
}
/*
+ * __txn_fixup_prepared_update --
+ * Fix/restore the history store update of a prepared datastore update based on transaction
+ * status.
+ */
+static int
+__txn_fixup_prepared_update(WT_SESSION_IMPL *session, WT_TXN_OP *op, WT_CURSOR *cursor, bool commit)
+{
+ WT_CURSOR *hs_cursor;
+ WT_CURSOR_BTREE *cbt, *hs_cbt;
+ WT_DECL_ITEM(hs_key);
+ WT_DECL_ITEM(hs_value);
+ WT_DECL_RET;
+ WT_TXN *txn;
+ WT_UPDATE *hs_upd, *tombstone, *upd;
+ wt_timestamp_t durable_ts, hs_start_ts, hs_stop_ts;
+ uint64_t hs_counter, type_full;
+ uint32_t hs_btree_id, session_flags, txn_flags;
+ int cmp;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ bool is_owner;
+
+ hs_cursor = NULL;
+ cbt = (WT_CURSOR_BTREE *)cursor;
+ txn = session->txn;
+ hs_upd = tombstone = upd = NULL;
+ durable_ts = hs_start_ts = WT_TS_NONE;
+ hs_btree_id = S2BT(session)->id;
+ session_flags = 0;
+ is_owner = false;
+
+ /*
+ * Transaction error and prepare are cleared temporarily as cursor functions are not allowed
+ * after an error or a prepared transaction.
+ */
+ txn_flags = FLD_MASK(txn->flags, WT_TXN_ERROR | WT_TXN_PREPARE);
+ F_CLR(txn, txn_flags);
+
+ /* Allocate buffers for the data store and history store key. */
+ WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
+ WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
+
+ /* Open a history store table cursor. */
+ WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner));
+ hs_cursor = session->hs_cursor;
+ hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
+
+ /*
+ * Scan the history store for the given btree and key with maximum start and stop time pair to
+ * let the search point to the last version of the key and start traversing backwards to find
+ * out the satisfying record according the given timestamp. Any satisfying history store record
+ * is moved into data store and removed from history store. If none of the history store records
+ * satisfy the given timestamp, the key is removed from data store.
+ */
+ WT_ERR(__wt_hs_cursor_position(session, hs_cursor, hs_btree_id, &op->u.op_row.key, WT_TS_MAX));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
+
+ /* Stop before crossing over to the next btree */
+ if (hs_btree_id != S2BT(session)->id)
+ goto err;
+
+ /*
+ * Keys are sorted in an order, skip the ones before the desired key, and bail out if we have
+ * crossed over the desired key and not found the record we are looking for.
+ */
+ WT_ERR(__wt_compare(session, NULL, hs_key, &op->u.op_row.key, &cmp));
+ if (cmp != 0)
+ goto err;
+
+ /*
+ * As part of the history store search, we never get an exact match based on our search criteria
+ * as we always search for a maximum record for that key. Make sure that we set the comparison
+ * result as an exact match to remove this key as part of rollback to stable. In case if we
+ * don't mark the comparison result as same, later the __wt_row_modify function will not
+ * properly remove the update from history store.
+ */
+ hs_cbt->compare = 0;
+
+ /* Get current value and convert to full update if it is a modify. */
+ WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_ts, &durable_ts, &type_full, hs_value));
+
+ /*
+ * If we found a history value that satisfied the given timestamp, add it to the update list.
+ * Otherwise remove the key by adding a tombstone.
+ */
+ if (commit) {
+ /*
+ * It is possible that the update in the history store may already been removed by an older
+ * transaction but retained it due to an history window.
+ */
+ if (hs_stop_ts != WT_TS_MAX)
+ goto err;
+
+ WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
+ hs_upd->durable_ts = hs_upd->start_ts = txn->durable_timestamp;
+ hs_upd->txnid = txn->id;
+ } else {
+ WT_ERR(__wt_upd_alloc(session, hs_value, WT_UPDATE_STANDARD, &upd, NULL));
+
+ upd->txnid = WT_TXN_NONE;
+ upd->durable_ts = durable_ts;
+ upd->start_ts = hs_start_ts;
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "update restored from history store (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s",
+ upd->txnid, __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(upd->durable_ts, ts_string[1]));
+
+ /*
+ * Set the flag to indicate that this update has been restored from history store for the
+ * rollback of a prepared transaction.
+ */
+ F_SET(upd, WT_UPDATE_RESTORED_FOR_ROLLBACK);
+
+ /*
+ * There should be only one aborted prepared update in the list, append it after the new
+ * update.
+ */
+ if (cbt->ins != NULL)
+ upd->next = cbt->ins->upd;
+ else if (cbt->ref->page->modify != NULL && cbt->ref->page->modify->mod_row_update != NULL)
+ upd->next = cbt->ref->page->modify->mod_row_update[cbt->slot];
+ WT_ASSERT(session,
+ upd->next != NULL && upd->next->next == NULL && upd->next->txnid == WT_TXN_ABORTED);
+
+ /* Append a tombstone if the stop timestamp exists. */
+ if (hs_stop_ts != WT_TS_MAX) {
+ WT_ERR(__wt_upd_alloc(session, NULL, WT_UPDATE_TOMBSTONE, &tombstone, NULL));
+ tombstone->durable_ts = hs_stop_ts;
+ tombstone->start_ts = hs_stop_ts;
+ tombstone->txnid = WT_TXN_NONE;
+ tombstone->next = upd;
+ } else
+ tombstone = upd;
+
+ WT_WITH_BTREE(session, cbt->btree,
+ ret = __wt_row_modify(cbt, &cbt->iface.key, NULL, tombstone, WT_UPDATE_INVALID, true));
+ WT_ERR(ret);
+ tombstone = NULL;
+ upd = NULL;
+
+ /* Remove the restored update from history store. */
+ WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL));
+ }
+
+ WT_ERR(__wt_hs_modify(hs_cbt, hs_upd));
+ hs_upd = NULL;
+
+err:
+ __wt_scr_free(session, &hs_key);
+ __wt_scr_free(session, &hs_value);
+ __wt_free(session, hs_upd);
+ __wt_free(session, upd);
+ __wt_free(session, tombstone);
+ WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner));
+ F_SET(txn, txn_flags);
+
+ return (ret);
+}
+
+/*
* __txn_search_prepared_op --
* Search for an operation's prepared update.
*/
@@ -645,12 +804,12 @@ __txn_search_prepared_op(
case WT_TXN_OP_REF_DELETE:
case WT_TXN_OP_TRUNCATE_COL:
case WT_TXN_OP_TRUNCATE_ROW:
- WT_RET_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type");
+ WT_RET_PANIC_ASSERT(session, false, WT_PANIC, "invalid prepared operation update type");
break;
}
F_CLR(txn, txn_flags);
- WT_WITH_BTREE(session, op->btree, ret = __wt_btcur_search_uncommitted(cursor, updp));
+ WT_WITH_BTREE(session, op->btree, ret = __wt_btcur_search_prepared(cursor, updp));
F_SET(txn, txn_flags);
WT_RET(ret);
WT_RET_ASSERT(session, *updp != NULL, WT_NOTFOUND,
@@ -668,8 +827,10 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
{
WT_TXN *txn;
WT_UPDATE *upd;
+ bool resolved;
txn = session->txn;
+ resolved = false;
WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd));
@@ -686,9 +847,14 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
if (!commit) {
upd->txnid = WT_TXN_ABORTED;
+ resolved = true;
continue;
}
+ /* Ignore the already resolved updates. */
+ if (upd->prepare_state == WT_PREPARE_RESOLVED)
+ continue;
+
/*
* Newer updates are inserted at head of update chain, and transaction operations are added
* at the tail of the transaction modify chain.
@@ -714,8 +880,19 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
* Resolve the prepared update to be committed update.
*/
__txn_resolve_prepared_update(session, upd);
+ resolved = true;
}
+ /*
+ * Fix the history store contents if they exist, when there are no more updates in the update
+ * list. Only in eviction, it is possible to write an unfinished history store update when the
+ * prepared updates are written to the data store. When the page is read back into memory, there
+ * will be only one uncommitted prepared update. There can be a false positive of fixing history
+ * store when handling prepared inserts, but it doesn't cost much.
+ */
+ if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && resolved && upd == NULL)
+ WT_RET_NOTFOUND_OK(__txn_fixup_prepared_update(session, op, *cursorp, commit));
+
return (0);
}
@@ -1144,7 +1321,7 @@ err:
* a prepared transaction.
*/
if (prepare)
- WT_PANIC_RET(session, ret, "failed to commit prepared transaction, failing the system");
+ WT_RET_PANIC(session, ret, "failed to commit prepared transaction, failing the system");
WT_TRET(__wt_txn_rollback(session, cfg));
return (ret);
@@ -1202,6 +1379,15 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
if (WT_IS_METADATA(op->btree->dhandle))
continue;
+ /*
+ * Logged table updates should never be prepared. As these updates are immediately durable,
+ * it is not possible to roll them back if the prepared transaction is rolled back.
+ */
+ if (!F_ISSET(op->btree, WT_BTREE_NO_LOGGING) &&
+ (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
+ F_ISSET(S2C(session), WT_CONN_IN_MEMORY)))
+ WT_RET_MSG(session, EINVAL, "transaction prepare is not supported with logged tables");
+
switch (op->type) {
case WT_TXN_OP_NONE:
break;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index af97d01a0fb..577281a2a47 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -745,8 +745,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_GLOBAL *txn_global;
WT_TXN_ISOLATION saved_isolation;
wt_timestamp_t ckpt_tmp_ts;
+ uint64_t finish_secs, hs_ckpt_duration_usecs, time_start_hs, time_stop_hs;
uint64_t fsync_duration_usecs, generation, time_start_fsync, time_stop_fsync;
- uint64_t time_start_hs, time_stop_hs, hs_ckpt_duration_usecs;
u_int i;
bool can_skip, failed, full, idle, logging, tracking, use_timestamp;
void *saved_meta_next;
@@ -985,6 +985,16 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
conn->txn_global.last_ckpt_timestamp = conn->txn_global.recovery_timestamp;
} else
conn->txn_global.last_ckpt_timestamp = WT_TS_NONE;
+
+ /*
+ * Save clock value marking end of checkpoint processing. If a hot backup starts before the
+ * next checkpoint, we will need to keep all checkpoints up to this clock value until the
+ * backup completes.
+ */
+ __wt_seconds(session, &finish_secs);
+ /* Be defensive: time is only monotonic per session */
+ if (finish_secs > conn->ckpt_finish_secs)
+ conn->ckpt_finish_secs = finish_secs;
}
err:
@@ -1146,7 +1156,6 @@ static void
__drop(WT_CKPT *ckptbase, const char *name, size_t len)
{
WT_CKPT *ckpt;
- u_int max_ckpt_drop;
/*
* If we're dropping internal checkpoints, match to the '.' separating the checkpoint name from
@@ -1155,20 +1164,9 @@ __drop(WT_CKPT *ckptbase, const char *name, size_t len)
* it's one we want to drop.
*/
if (strncmp(WT_CHECKPOINT, name, len) == 0) {
- /*
- * Currently, hot backup cursors block checkpoint drop, which means releasing a hot backup
- * cursor can result in immediately attempting to drop lots of checkpoints, which involves a
- * fair amount of work while holding locks. Limit the number of standard checkpoints dropped
- * per checkpoint.
- */
- max_ckpt_drop = 0;
WT_CKPT_FOREACH (ckptbase, ckpt)
- if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT))
F_SET(ckpt, WT_CKPT_DELETE);
-#define WT_MAX_CHECKPOINT_DROP 4
- if (++max_ckpt_drop >= WT_MAX_CHECKPOINT_DROP)
- break;
- }
} else
WT_CKPT_FOREACH (ckptbase, ckpt)
if (WT_STRING_MATCH(ckpt->name, name, len))
@@ -1248,30 +1246,44 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ u_int max_ckpt_drop;
+ bool is_wt_ckpt;
WT_UNUSED(is_checkpoint);
conn = S2C(session);
- /*
- * We can't delete checkpoints if a backup cursor is open. WiredTiger checkpoints are uniquely
- * named and it's OK to have multiple of them in the system: clear the delete flag for them, and
- * otherwise fail. Hold the lock until we're done (blocking hot backups from starting), we don't
- * want to race with a future hot backup.
- */
- if (conn->hot_backup)
- WT_CKPT_FOREACH (ckptbase, ckpt) {
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
- if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ /* Check that it is OK to remove all the checkpoints marked for deletion. */
+ max_ckpt_drop = 0;
+ WT_CKPT_FOREACH (ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ is_wt_ckpt = WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT);
+
+ /*
+ * If there is a hot backup, don't delete any WiredTiger checkpoint that could possibly have
+ * been created before the backup started. Fail if trying to delete any other named
+ * checkpoint.
+ */
+ if (conn->hot_backup_start != 0 && ckpt->sec <= conn->hot_backup_start) {
+ if (is_wt_ckpt) {
F_CLR(ckpt, WT_CKPT_DELETE);
continue;
}
WT_RET_MSG(session, EBUSY,
- "checkpoint %s blocked by hot backup: it would"
+ "checkpoint %s blocked by hot backup: it would "
"delete an existing checkpoint, and checkpoints "
"cannot be deleted during a hot backup",
ckpt->name);
}
+ /*
+ * Dropping checkpoints involves a fair amount of work while holding locks. Limit the number
+ * of WiredTiger checkpoints dropped per checkpoint.
+ */
+ if (is_wt_ckpt)
+#define WT_MAX_CHECKPOINT_DROP 4
+ if (++max_ckpt_drop >= WT_MAX_CHECKPOINT_DROP)
+ F_CLR(ckpt, WT_CKPT_DELETE);
+ }
/*
* Mark old checkpoints that are being deleted and figure out which trees we can skip in this
@@ -1291,6 +1303,8 @@ __checkpoint_lock_dirty_tree_int(WT_SESSION_IMPL *session, bool is_checkpoint, b
WT_CKPT_FOREACH (ckptbase, ckpt) {
if (!F_ISSET(ckpt, WT_CKPT_DELETE))
continue;
+ WT_ASSERT(session, !WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT) ||
+ conn->hot_backup_start == 0 || ckpt->sec > conn->hot_backup_start);
/*
* We can't delete checkpoints referenced by a cursor. WiredTiger checkpoints are
* uniquely named and it's OK to have multiple in the system: clear the delete flag for
@@ -1496,9 +1510,7 @@ __checkpoint_mark_skip(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force)
* Update a checkpoint based on reconciliation results.
*/
void
-__wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+__wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta)
{
WT_BTREE *btree;
WT_CKPT *ckpt, *ckptbase;
@@ -1514,12 +1526,7 @@ __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, wt_timestamp_t s
WT_CKPT_FOREACH (ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
ckpt->write_gen = btree->write_gen;
- ckpt->start_durable_ts = start_durable_ts;
- ckpt->oldest_start_ts = oldest_start_ts;
- ckpt->oldest_start_txn = oldest_start_txn;
- ckpt->stop_durable_ts = stop_durable_ts;
- ckpt->newest_stop_ts = newest_stop_ts;
- ckpt->newest_stop_txn = newest_stop_txn;
+ __wt_time_aggregate_copy(&ckpt->ta, ta);
}
}
@@ -1536,6 +1543,7 @@ __checkpoint_tree(WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
WT_LSN ckptlsn;
+ WT_TIME_AGGREGATE ta;
bool fake_ckpt, resolve_bm;
WT_UNUSED(cfg);
@@ -1545,6 +1553,7 @@ __checkpoint_tree(WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[
conn = S2C(session);
dhandle = session->dhandle;
fake_ckpt = resolve_bm = false;
+ __wt_time_aggregate_init(&ta);
/*
* Set the checkpoint LSN to the maximum LSN so that if logging is disabled, recovery will never
@@ -1565,8 +1574,7 @@ __checkpoint_tree(WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[
* tears.
*/
if (is_checkpoint && btree->original) {
- __wt_checkpoint_tree_reconcile_update(
- session, WT_TS_NONE, WT_TS_NONE, WT_TXN_NONE, WT_TXN_NONE, WT_TS_MAX, WT_TXN_MAX);
+ __wt_checkpoint_tree_reconcile_update(session, &ta);
fake_ckpt = true;
goto fake;
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index b5f5dab0077..90aa7ccae0a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -35,7 +35,7 @@ __txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* the original page.
*/
if (cbt->ins == NULL) {
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
WT_ASSERT(session, cbt->slot < page->entries);
rip = &page->pg_row[cbt->slot];
@@ -554,8 +554,9 @@ __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_
* connection close, only during a full checkpoint. A clean close may not update any
* metadata LSN and we do not want to archive in that case.
*/
- if (!conn->hot_backup && (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
- FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
+ if (conn->hot_backup_start == 0 &&
+ (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) ||
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_FORCE_DOWNGRADE)) &&
txn->full_ckpt)
__wt_log_ckpt(session, ckpt_lsn);
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index ed493f7765f..56a1f238c05 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -144,7 +144,8 @@ __txn_op_apply(WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *
* Build/insert a complete value during recovery rather than using cursor modify to
* create a partial update (for no particular reason than simplicity).
*/
- WT_ERR(__wt_modify_apply(cursor, value.data));
+ WT_ERR(__wt_modify_apply_item(
+ CUR2S(cursor), cursor->value_format, &cursor->value, value.data));
WT_ERR(cursor->insert(cursor));
}
break;
@@ -205,7 +206,8 @@ __txn_op_apply(WT_RECOVERY *r, WT_LSN *lsnp, const uint8_t **pp, const uint8_t *
* Build/insert a complete value during recovery rather than using cursor modify to
* create a partial update (for no particular reason than simplicity).
*/
- WT_ERR(__wt_modify_apply(cursor, value.data));
+ WT_ERR(__wt_modify_apply_item(
+ CUR2S(cursor), cursor->value_format, &cursor->value, value.data));
WT_ERR(cursor->insert(cursor));
}
break;
@@ -424,7 +426,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
}
if (r->files[fileid].uri != NULL)
- WT_PANIC_RET(r->session, WT_PANIC,
+ WT_RET_PANIC(r->session, WT_PANIC,
"metadata corruption: files %s and %s have the same "
"file ID %u",
uri, r->files[fileid].uri, fileid);
@@ -514,13 +516,14 @@ __recovery_file_scan(WT_RECOVERY *r)
* Run recovery.
*/
int
-__wt_txn_recover(WT_SESSION_IMPL *session)
+__wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR *metac;
WT_DECL_RET;
WT_RECOVERY r;
WT_RECOVERY_FILE *metafile;
+ WT_SESSION *wt_session;
char *config;
char ts_string[2][WT_TS_INT_STRING_SIZE];
bool do_checkpoint, eviction_started, hs_exists, needs_rec, was_backup;
@@ -548,18 +551,6 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
metafile->c = metac;
/*
- * We should check whether the history store file exists or not. or not. If it does not, then we
- * should not apply rollback to stable to each table. This might happen if we're upgrading from
- * an older version.
- */
- metac->set_key(metac, WT_HS_URI);
- WT_ERR_NOTFOUND_OK(metac->search(metac), true);
- if (ret == WT_NOTFOUND)
- hs_exists = false;
- /* Unpin the page from cache. */
- WT_ERR(metac->reset(metac));
-
- /*
* If no log was found (including if logging is disabled), or if the last checkpoint was done
* with logging disabled, recovery should not run. Scan the metadata to figure out the largest
* file ID.
@@ -633,6 +624,53 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
WT_ERR(ret);
}
+ /*
+ * We should check whether the history store file exists in the metadata or not. If it does not,
+ * then we should skip rollback to stable for each table. This might happen if we're upgrading
+ * from an older version. If it does exist in the metadata we should check that it exists on
+ * disk to confirm that it wasn't deleted between runs.
+ *
+ * This needs to happen after we apply the logs as they may contain the metadata changes which
+ * include the history store creation. As such the on disk metadata file won't contain the
+ * history store but will after log application.
+ */
+ metac->set_key(metac, WT_HS_URI);
+ WT_ERR_NOTFOUND_OK(metac->search(metac), true);
+ if (ret == WT_NOTFOUND) {
+ hs_exists = false;
+ } else {
+ /* Given the history store exists in the metadata validate whether it exists on disk. */
+ WT_ERR(__wt_fs_exist(session, WT_HS_FILE, &hs_exists));
+ if (hs_exists) {
+ /*
+ * Attempt to configure the history store, this will detect corruption if it fails.
+ */
+ ret = __wt_hs_config(session, cfg);
+ if (ret != 0) {
+ if (F_ISSET(conn, WT_CONN_SALVAGE)) {
+ wt_session = &session->iface;
+ WT_ERR(wt_session->salvage(wt_session, WT_HS_URI, NULL));
+ } else
+ WT_ERR(ret);
+ }
+ } else {
+ /*
+ * We're attempting to salvage the database with a missing history store, remove it from
+ * the metadata and pretend it never existed. As such we won't run rollback to stable
+ * later.
+ */
+ if (F_ISSET(conn, WT_CONN_SALVAGE)) {
+ hs_exists = false;
+ metac->remove(metac);
+ } else
+ /* The history store file has likely been deleted, we cannot recover from this. */
+ WT_ERR_MSG(session, WT_TRY_SALVAGE, "%s file is corrupted or missing", WT_HS_FILE);
+ }
+ }
+
+ /* Unpin the page from cache. */
+ WT_ERR(metac->reset(metac));
+
/* Scan the metadata to find the live files and their IDs. */
WT_ERR(__recovery_file_scan(&r));
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index b27342c3f93..e2489bc8563 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -26,7 +26,8 @@ __rollback_abort_newer_update(WT_SESSION_IMPL *session, WT_UPDATE *first_upd,
if (upd->txnid == WT_TXN_ABORTED) {
if (upd == first_upd)
first_upd = upd->next;
- } else if (rollback_timestamp < upd->durable_ts) {
+ } else if (rollback_timestamp < upd->durable_ts ||
+ upd->prepare_state == WT_PREPARE_INPROGRESS) {
/*
* If any updates are aborted, all newer updates better be aborted as well.
*
@@ -40,9 +41,10 @@ __rollback_abort_newer_update(WT_SESSION_IMPL *session, WT_UPDATE *first_upd,
__wt_verbose(session, WT_VERB_RTS,
"rollback to stable update aborted with txnid: %" PRIu64
- " durable timestamp: %s and stable timestamp: %s",
+ " durable timestamp: %s and stable timestamp: %s, prepared: %s",
upd->txnid, __wt_timestamp_to_string(upd->durable_ts, ts_string[0]),
- __wt_timestamp_to_string(rollback_timestamp, ts_string[1]));
+ __wt_timestamp_to_string(rollback_timestamp, ts_string[1]),
+ rollback_timestamp < upd->durable_ts ? "false" : "true");
upd->txnid = WT_TXN_ABORTED;
WT_STAT_CONN_INCR(session, txn_rts_upd_aborted);
@@ -229,7 +231,8 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
WT_ERR(hs_cursor->get_value(hs_cursor, &hs_stop_ts, &durable_ts, &type_full, hs_value));
type = (uint8_t)type_full;
if (type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_apply_item(session, &full_value, hs_value->data, false));
+ WT_ERR(__wt_modify_apply_item(
+ session, S2BT(session)->value_format, &full_value, hs_value->data));
else {
WT_ASSERT(session, type == WT_UPDATE_STANDARD);
WT_ERR(__wt_buf_set(session, &full_value, hs_value->data, hs_value->size));
@@ -352,17 +355,20 @@ __rollback_abort_row_ondisk_kv(
WT_DECL_RET;
WT_ITEM buf;
WT_UPDATE *upd;
- char ts_string[3][WT_TS_INT_STRING_SIZE];
+ char ts_string[4][WT_TS_INT_STRING_SIZE];
+ bool prepared;
vpack = &_vpack;
upd = NULL;
__wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
- if (vpack->durable_start_ts > rollback_timestamp) {
+ prepared = F_ISSET(vpack, WT_CELL_UNPACK_PREPARE);
+ if (vpack->tw.durable_start_ts > rollback_timestamp ||
+ (vpack->tw.durable_stop_ts == WT_TS_NONE && prepared)) {
__wt_verbose(session, WT_VERB_RTS,
- "on-disk update aborted with start durable timestamp: %s, commit timestamp: %s and "
- "stable timestamp: %s",
- __wt_timestamp_to_string(vpack->durable_start_ts, ts_string[0]),
- __wt_timestamp_to_string(vpack->start_ts, ts_string[1]),
+ "on-disk update aborted with start durable timestamp: %s, commit timestamp: %s, "
+ "prepared: %s and stable timestamp: %s",
+ __wt_timestamp_to_string(vpack->tw.durable_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(vpack->tw.start_ts, ts_string[1]), prepared ? "true" : "false",
__wt_timestamp_to_string(rollback_timestamp, ts_string[2]));
if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
return (__rollback_row_ondisk_fixup_key(session, page, rip, rollback_timestamp, true));
@@ -374,8 +380,8 @@ __rollback_abort_row_ondisk_kv(
WT_RET(__wt_upd_alloc_tombstone(session, &upd, NULL));
WT_STAT_CONN_INCR(session, txn_rts_keys_removed);
}
- } else if (vpack->durable_stop_ts != WT_TS_NONE &&
- vpack->durable_stop_ts > rollback_timestamp) {
+ } else if (vpack->tw.durable_stop_ts != WT_TS_NONE &&
+ (vpack->tw.durable_stop_ts > rollback_timestamp || prepared)) {
/*
* Clear the remove operation from the key by inserting the original on-disk value as a
* standard update.
@@ -391,14 +397,19 @@ __rollback_abort_row_ondisk_kv(
WT_RET(__wt_page_cell_data_ref(session, page, vpack, &buf));
WT_RET(__wt_upd_alloc(session, &buf, WT_UPDATE_STANDARD, &upd, NULL));
- upd->txnid = vpack->start_txn;
- upd->durable_ts = vpack->durable_start_ts;
- upd->start_ts = vpack->start_ts;
+ upd->txnid = vpack->tw.start_txn;
+ upd->durable_ts = vpack->tw.durable_start_ts;
+ upd->start_ts = vpack->tw.start_ts;
WT_STAT_CONN_INCR(session, txn_rts_keys_restored);
__wt_verbose(session, WT_VERB_RTS,
- "key restored (txnid: %" PRIu64 ", start_ts: %s, durable_ts: %s", upd->txnid,
+ "key restored with commit timestamp: %s, durable timestamp: %s txnid: %" PRIu64
+ "and removed commit timestamp: %s, durable timestamp: %s, txnid: %" PRIu64
+ ", prepared: %s",
__wt_timestamp_to_string(upd->start_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->durable_ts, ts_string[1]));
+ __wt_timestamp_to_string(upd->durable_ts, ts_string[1]), upd->txnid,
+ __wt_timestamp_to_string(vpack->tw.stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(vpack->tw.durable_stop_ts, ts_string[3]), vpack->tw.stop_txn,
+ prepared ? "true" : "false");
} else
/* Stable version according to the timestamp. */
return (0);
@@ -517,13 +528,14 @@ __rollback_abort_row_reconciled_page(
return (0);
if (mod->rec_result == WT_PM_REC_REPLACE &&
- (mod->mod_replace.newest_start_durable_ts > rollback_timestamp ||
- mod->mod_replace.newest_stop_durable_ts > rollback_timestamp)) {
+ (mod->mod_replace.ta.newest_start_durable_ts > rollback_timestamp ||
+ mod->mod_replace.ta.newest_stop_durable_ts > rollback_timestamp ||
+ mod->mod_replace.ta.prepare)) {
__wt_verbose(session, WT_VERB_RTS,
"reconciled replace block page history store update removal On-disk with start "
"durable timestamp: %s, stop durable timestamp: %s and stable timestamp: %s",
- __wt_timestamp_to_string(mod->mod_replace.newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(mod->mod_replace.newest_stop_durable_ts, ts_string[1]),
+ __wt_timestamp_to_string(mod->mod_replace.ta.newest_start_durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(mod->mod_replace.ta.newest_stop_durable_ts, ts_string[1]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[2]));
WT_RET(__rollback_abort_row_reconciled_page_internal(session, mod->u1.r.disk_image,
@@ -538,14 +550,15 @@ __rollback_abort_row_reconciled_page(
} else if (mod->rec_result == WT_PM_REC_MULTIBLOCK) {
for (multi = mod->mod_multi, multi_entry = 0; multi_entry < mod->mod_multi_entries;
++multi, ++multi_entry)
- if (multi->addr.newest_start_durable_ts > rollback_timestamp ||
- multi->addr.newest_stop_durable_ts > rollback_timestamp) {
+ if (multi->addr.ta.newest_start_durable_ts > rollback_timestamp ||
+ multi->addr.ta.newest_stop_durable_ts > rollback_timestamp ||
+ multi->addr.ta.prepare) {
__wt_verbose(session, WT_VERB_RTS,
- "reconciled multi block page history store update removal On-disk with "
+ "reconciled multi block page history store update removal on-disk with "
"start durable timestamp: %s, stop durable timestamp: %s and stable "
"timestamp: %s",
- __wt_timestamp_to_string(multi->addr.newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(multi->addr.newest_stop_durable_ts, ts_string[1]),
+ __wt_timestamp_to_string(multi->addr.ta.newest_start_durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(multi->addr.ta.newest_stop_durable_ts, ts_string[1]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[2]));
WT_RET(__rollback_abort_row_reconciled_page_internal(session, multi->disk_image,
@@ -629,13 +642,13 @@ __rollback_page_needs_abort(
uint32_t i;
char ts_string[WT_TS_INT_STRING_SIZE];
const char *tag;
- bool result;
+ bool prepared, result;
addr = ref->addr;
mod = ref->page == NULL ? NULL : ref->page->modify;
durable_ts = WT_TS_NONE;
tag = "undefined state";
- result = false;
+ prepared = result = false;
/*
* The rollback operation should be performed on this page when any one of the following is
@@ -647,31 +660,37 @@ __rollback_page_needs_abort(
*/
if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) {
tag = "reconciled replace block";
- durable_ts =
- WT_MAX(mod->mod_replace.newest_start_durable_ts, mod->mod_replace.newest_stop_durable_ts);
- result = (durable_ts > rollback_timestamp);
+ durable_ts = WT_MAX(
+ mod->mod_replace.ta.newest_start_durable_ts, mod->mod_replace.ta.newest_stop_durable_ts);
+ prepared = mod->mod_replace.ta.prepare;
+ result = (durable_ts > rollback_timestamp) || prepared;
} else if (mod != NULL && mod->rec_result == WT_PM_REC_MULTIBLOCK) {
tag = "reconciled multi block";
/* Calculate the max durable timestamp by traversing all multi addresses. */
for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- durable_ts = WT_MAX(durable_ts, multi->addr.newest_start_durable_ts);
- durable_ts = WT_MAX(durable_ts, multi->addr.newest_stop_durable_ts);
+ durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_start_durable_ts);
+ durable_ts = WT_MAX(durable_ts, multi->addr.ta.newest_stop_durable_ts);
+ if (multi->addr.ta.prepare)
+ prepared = true;
}
- result = (durable_ts > rollback_timestamp);
+ result = (durable_ts > rollback_timestamp) || prepared;
} else if (!__wt_off_page(ref->home, addr)) {
tag = "on page cell";
/* Check if the page is obsolete using the page disk address. */
__wt_cell_unpack(session, ref->home, (WT_CELL *)addr, &vpack);
- durable_ts = WT_MAX(vpack.newest_start_durable_ts, vpack.newest_stop_durable_ts);
- result = (durable_ts > rollback_timestamp);
+ durable_ts = WT_MAX(vpack.ta.newest_start_durable_ts, vpack.ta.newest_stop_durable_ts);
+ prepared = F_ISSET(&vpack, WT_CELL_UNPACK_PREPARE);
+ result = (durable_ts > rollback_timestamp) || prepared;
} else if (addr != NULL) {
tag = "address";
- durable_ts = WT_MAX(addr->newest_start_durable_ts, addr->newest_stop_durable_ts);
- result = (durable_ts > rollback_timestamp);
+ durable_ts = WT_MAX(addr->ta.newest_start_durable_ts, addr->ta.newest_stop_durable_ts);
+ prepared = addr->ta.prepare;
+ result = (durable_ts > rollback_timestamp) || prepared;
}
- __wt_verbose(session, WT_VERB_RTS, "%p: page with %s durable timestamp: %s", (void *)ref, tag,
- __wt_timestamp_to_string(durable_ts, ts_string));
+ __wt_verbose(session, WT_VERB_RTS,
+ "%p: page with %s durable timestamp: %s and prepared updates: %s", (void *)ref, tag,
+ __wt_timestamp_to_string(durable_ts, ts_string), prepared ? "true" : "false");
return (result);
}
@@ -694,9 +713,9 @@ __rollback_verify_ondisk_page(
/* Review updates that belong to keys that are on the disk image. */
WT_ROW_FOREACH (page, rip, i) {
__wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
- WT_ASSERT(session, vpack->start_ts <= rollback_timestamp);
- if (vpack->stop_ts != WT_TS_MAX)
- WT_ASSERT(session, vpack->stop_ts <= rollback_timestamp);
+ WT_ASSERT(session, vpack->tw.durable_start_ts <= rollback_timestamp);
+ WT_ASSERT(session, vpack->tw.durable_stop_ts == WT_TS_NONE ||
+ vpack->tw.durable_stop_ts <= rollback_timestamp);
}
}
#endif
@@ -733,7 +752,8 @@ __rollback_abort_newer_updates(
#ifdef HAVE_DIAGNOSTIC
if (ref->page == NULL && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
WT_RET(__wt_page_in(session, ref, 0));
- __rollback_verify_ondisk_page(session, ref->page, rollback_timestamp);
+ if (ref->page->type == WT_PAGE_ROW_LEAF)
+ __rollback_verify_ondisk_page(session, ref->page, rollback_timestamp);
WT_TRET(__wt_page_release(session, ref, 0));
}
#endif
@@ -846,7 +866,7 @@ __rollback_to_stable_btree(WT_SESSION_IMPL *session, wt_timestamp_t rollback_tim
*/
if (__wt_btree_immediately_durable(session)) {
if (btree->id >= conn->stable_rollback_maxfile)
- WT_PANIC_RET(session, EINVAL, "btree file ID %" PRIu32 " larger than max %" PRIu32,
+ WT_RET_PANIC(session, EINVAL, "btree file ID %" PRIu32 " larger than max %" PRIu32,
btree->id, conn->stable_rollback_maxfile);
return (0);
}
@@ -991,10 +1011,11 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t max_durable_ts, start_durable_ts, stop_durable_ts, rollback_timestamp;
+ wt_timestamp_t max_durable_ts, newest_start_durable_ts, newest_stop_durable_ts,
+ rollback_timestamp;
char ts_string[2][WT_TS_INT_STRING_SIZE];
const char *config, *uri;
- bool durable_ts_found;
+ bool durable_ts_found, prepared_updates;
txn_global = &S2C(session)->txn_global;
@@ -1025,25 +1046,33 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session)
WT_ERR(cursor->get_value(cursor, &config));
/* Find out the max durable timestamp of the object from checkpoint. */
- start_durable_ts = stop_durable_ts = WT_TS_NONE;
- durable_ts_found = false;
+ newest_start_durable_ts = newest_stop_durable_ts = WT_TS_NONE;
+ durable_ts_found = prepared_updates = false;
WT_ERR(__wt_config_getones(session, config, "checkpoint", &cval));
__wt_config_subinit(session, &ckptconf, &cval);
for (; __wt_config_next(&ckptconf, &key, &cval) == 0;) {
- ret = __wt_config_subgets(session, &cval, "start_durable_ts", &durableval);
+ ret = __wt_config_subgets(session, &cval, "newest_start_durable_ts", &durableval);
if (ret == 0) {
- start_durable_ts = WT_MAX(start_durable_ts, (wt_timestamp_t)durableval.val);
+ newest_start_durable_ts =
+ WT_MAX(newest_start_durable_ts, (wt_timestamp_t)durableval.val);
durable_ts_found = true;
}
WT_ERR_NOTFOUND_OK(ret, false);
- ret = __wt_config_subgets(session, &cval, "stop_durable_ts", &durableval);
+ ret = __wt_config_subgets(session, &cval, "newest_stop_durable_ts", &durableval);
if (ret == 0) {
- stop_durable_ts = WT_MAX(stop_durable_ts, (wt_timestamp_t)durableval.val);
+ newest_stop_durable_ts =
+ WT_MAX(newest_stop_durable_ts, (wt_timestamp_t)durableval.val);
durable_ts_found = true;
}
WT_ERR_NOTFOUND_OK(ret, false);
+ ret = __wt_config_subgets(session, &cval, "prepare", &durableval);
+ if (ret == 0) {
+ if (durableval.val)
+ prepared_updates = true;
+ }
+ WT_ERR_NOTFOUND_OK(ret, false);
}
- max_durable_ts = WT_MAX(start_durable_ts, stop_durable_ts);
+ max_durable_ts = WT_MAX(newest_start_durable_ts, newest_stop_durable_ts);
ret = __wt_session_get_dhandle(session, uri, NULL, NULL, 0);
/* Ignore performing rollback to stable on files that don't exist. */
if (ret == ENOENT)
@@ -1056,12 +1085,14 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session)
* 2. The checkpoint durable start/stop timestamp is greater than the rollback timestamp.
* 3. There is no durable timestamp in any checkpoint.
*/
- if (S2BT(session)->modified || max_durable_ts > rollback_timestamp || !durable_ts_found) {
+ if (S2BT(session)->modified || max_durable_ts > rollback_timestamp || prepared_updates ||
+ !durable_ts_found) {
__wt_verbose(session, WT_VERB_RTS,
"tree rolled back with durable timestamp: %s, or when tree is modified: %s or "
- "when durable time is not found: %s",
+ "prepared updates: %s or when durable time is not found: %s",
__wt_timestamp_to_string(max_durable_ts, ts_string[0]),
- S2BT(session)->modified ? "true" : "false", !durable_ts_found ? "true" : "false");
+ S2BT(session)->modified ? "true" : "false", prepared_updates ? "true" : "false",
+ !durable_ts_found ? "true" : "false");
WT_TRET(__rollback_to_stable_btree(session, rollback_timestamp));
} else
__wt_verbose(session, WT_VERB_RTS,
@@ -1141,9 +1172,9 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp
* Rollback to stable should ignore tombstones in the history store since it needs to scan the
* entire table sequentially.
*/
- F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE | WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE);
ret = __rollback_to_stable(session, cfg);
- F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE | WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE);
WT_RET(ret);
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index f81e7b54acc..4bd11d5456a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -9,6 +9,18 @@
#include "wt_internal.h"
/*
+ * __wt_timestamp_to_string --
+ * Convert a timestamp to the MongoDB string representation.
+ */
+char *
+__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string)
+{
+ WT_IGNORE_RET(__wt_snprintf(ts_string, WT_TS_INT_STRING_SIZE, "(%" PRIu32 ", %" PRIu32 ")",
+ (uint32_t)((ts >> 32) & 0xffffffff), (uint32_t)(ts & 0xffffffff)));
+ return (ts_string);
+}
+
+/*
* __wt_time_pair_to_string --
* Converts a time pair to a standard string representation.
*/
@@ -23,15 +35,41 @@ __wt_time_pair_to_string(wt_timestamp_t timestamp, uint64_t txn_id, char *tp_str
}
/*
- * __wt_timestamp_to_string --
- * Convert a timestamp to the MongoDB string representation.
+ * __wt_time_window_to_string --
+ * Converts a time window to a standard string representation.
*/
char *
-__wt_timestamp_to_string(wt_timestamp_t ts, char *ts_string)
+__wt_time_window_to_string(WT_TIME_WINDOW *tw, char *tw_string)
{
- WT_IGNORE_RET(__wt_snprintf(ts_string, WT_TS_INT_STRING_SIZE, "(%" PRIu32 ", %" PRIu32 ")",
- (uint32_t)((ts >> 32) & 0xffffffff), (uint32_t)(ts & 0xffffffff)));
- return (ts_string);
+ char ts_string[4][WT_TS_INT_STRING_SIZE];
+
+ WT_IGNORE_RET(__wt_snprintf(tw_string, WT_TIME_STRING_SIZE,
+ "start: %s/%s/%" PRIu64 " stop: %s/%s/%" PRIu64 "%s",
+ __wt_timestamp_to_string(tw->durable_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->start_ts, ts_string[1]), tw->start_txn,
+ __wt_timestamp_to_string(tw->durable_stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(tw->stop_ts, ts_string[3]), tw->stop_txn,
+ tw->prepare ? ", prepared" : ""));
+ return (tw_string);
+}
+
+/*
+ * __wt_time_aggregate_to_string --
+ * Converts a time aggregate to a standard string representation.
+ */
+char *
+__wt_time_aggregate_to_string(WT_TIME_AGGREGATE *ta, char *ta_string)
+{
+ char ts_string[4][WT_TS_INT_STRING_SIZE];
+
+ WT_IGNORE_RET(__wt_snprintf(ta_string, WT_TIME_STRING_SIZE,
+ "newest durable: %s/%s oldest start: %s/%" PRIu64 " newest stop %s/%" PRIu64 "%s",
+ __wt_timestamp_to_string(ta->newest_start_durable_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_stop_durable_ts, ts_string[1]),
+ __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[2]), ta->oldest_start_txn,
+ __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[3]), ta->newest_stop_txn,
+ ta->prepare ? ", prepared" : ""));
+ return (ta_string);
}
/*
@@ -700,8 +738,7 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t commit_ts
__wt_timestamp_to_string(txn->first_commit_timestamp, ts_string[1]));
/*
- * FIXME:
- * WT-4779 disabled to buy time to understand a test failure.
+ * FIXME-WT-4780: Disabled to buy time to understand a test failure.
* WT_RET(__txn_assert_after_reads(
* session, "commit", commit_ts, NULL));
*/
diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c
index 47b6f99bbe9..9ea4e10e7aa 100755
--- a/src/third_party/wiredtiger/src/utilities/util_dump.c
+++ b/src/third_party/wiredtiger/src/utilities/util_dump.c
@@ -155,7 +155,7 @@ util_dump(WT_SESSION *session, int argc, char *argv[])
* case, we're specifically interested in what is visible at a given read timestamp.
*/
if (WT_STREQ(simpleuri, WT_HS_URI) && timestamp == NULL)
- F_SET(session_impl, WT_SESSION_IGNORE_HS_TOMBSTONE);
+ F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
if (dump_config(session, simpleuri, cursor, hex, json) != 0)
goto err;
@@ -164,6 +164,7 @@ util_dump(WT_SESSION *session, int argc, char *argv[])
if (json && dump_json_table_end(session) != 0)
goto err;
+ F_CLR(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
ret = cursor->close(cursor);
cursor = NULL;
if (ret != 0) {
@@ -179,9 +180,12 @@ err:
ret = 1;
}
- F_CLR(session_impl, WT_SESSION_IGNORE_HS_TOMBSTONE);
- if (cursor != NULL && (ret = cursor->close(cursor)) != 0)
- ret = util_err(session, ret, NULL);
+ if (cursor != NULL) {
+ F_CLR(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+ if ((ret = cursor->close(cursor)) != 0)
+ ret = util_err(session, ret, NULL);
+ }
+
if (ofile != NULL && (ret = fclose(fp)) != 0)
ret = util_err(session, errno, NULL);
diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am
index 8eaf1a83756..fdfb42ffc3f 100644
--- a/src/third_party/wiredtiger/test/csuite/Makefile.am
+++ b/src/third_party/wiredtiger/test/csuite/Makefile.am
@@ -8,8 +8,7 @@ all_TESTS=
noinst_PROGRAMS=
# The import test is only a shell script
-# Temporarily disabled
-# all_TESTS += import/smoke.sh
+all_TESTS += import/smoke.sh
test_incr_backup_SOURCES = incr_backup/main.c
noinst_PROGRAMS += test_incr_backup
diff --git a/src/third_party/wiredtiger/test/csuite/random_abort/main.c b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
index 906492c6c20..52161d089a8 100644
--- a/src/third_party/wiredtiger/test/csuite/random_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/random_abort/main.c
@@ -160,7 +160,7 @@ thread_run(void *arg)
/*
* Make sure that alternative threads operate on column-store table
*
- * FIXME: temporarily turn off column store test.
+ * FIXME-WT-6125: temporarily turn off column store test.
*/
if (td->id % 2 != 0)
columnar_table = true;
@@ -180,7 +180,7 @@ thread_run(void *arg)
if (i == 0)
i++;
- /* FIXME: temporarily turn off tests for lower isolation levels. */
+ /* FIXME-WT-6035: temporarily turn off tests for lower isolation levels. */
testutil_check(session->begin_transaction(session, "isolation=snapshot"));
/*
@@ -208,7 +208,7 @@ thread_run(void *arg)
cursor->set_value(cursor, &data);
testutil_check(cursor->insert(cursor));
- /* FIXME: temporarily turn off tests for lower isolation levels. */
+ /* FIXME-WT-6035: temporarily turn off tests for lower isolation levels. */
testutil_check(session->commit_transaction(session, NULL));
/*
@@ -221,7 +221,7 @@ thread_run(void *arg)
* Decide what kind of operation can be performed on the already inserted data.
*/
if (i % MAX_NUM_OPS == OP_TYPE_DELETE) {
- /* FIXME: temporarily turn off tests for lower isolation levels. */
+ /* FIXME-WT-6035: temporarily turn off tests for lower isolation levels. */
testutil_check(session->begin_transaction(session, "isolation=snapshot"));
if (columnar_table)
@@ -231,7 +231,7 @@ thread_run(void *arg)
testutil_check(cursor->remove(cursor));
- /* FIXME: temporarily turn off tests for lower isolation levels. */
+ /* FIXME-WT-6035: temporarily turn off tests for lower isolation levels. */
testutil_check(session->commit_transaction(session, NULL));
/* Save the key separately for checking later.*/
@@ -391,7 +391,7 @@ recover_and_verify(uint32_t nthreads)
cursor = row_cursor;
}
#else
- /* FIXME: temporarily turn off column store test. */
+ /* FIXME-WT-6125: temporarily turn off column store test. */
columnar_table = false;
cursor = row_cursor;
#endif
diff --git a/src/third_party/wiredtiger/test/csuite/random_abort/smoke.sh b/src/third_party/wiredtiger/test/csuite/random_abort/smoke.sh
index 713b000b4f1..aea7fedf4b9 100755
--- a/src/third_party/wiredtiger/test/csuite/random_abort/smoke.sh
+++ b/src/third_party/wiredtiger/test/csuite/random_abort/smoke.sh
@@ -9,6 +9,10 @@ set -e
top_builddir=${top_builddir:-../../build_posix}
top_srcdir=${top_srcdir:-../..}
+#FIXME-WT-6093: reenable calls to test_random_abort
+echo "Warning: test_random_abort temporarily disabled"
+exit 0
+
$TEST_WRAPPER $top_builddir/test/csuite/test_random_abort -t 10 -T 5
$TEST_WRAPPER $top_builddir/test/csuite/test_random_abort -m -t 10 -T 5
$TEST_WRAPPER $top_builddir/test/csuite/test_random_abort -C -t 10 -T 5
diff --git a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
index 3483b047fed..a63cfb2724a 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4105_large_doc_small_upd/main.c
@@ -132,7 +132,7 @@ main(int argc, char *argv[])
modify_entry.data.size = strlen(modify_entry.data.data);
modify_entry.offset = offset;
modify_entry.size = modify_entry.data.size;
- /* FIXME-PM-1521: extend timeout to pass the test */
+ /* FIXME-WT-6113: extend timeout to pass the test */
(void)alarm(7);
testutil_check(c->modify(c, &modify_entry, 1));
(void)alarm(0);
diff --git a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
index 8f5394332ad..9a263eb9ef6 100644
--- a/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt4333_handle_locks/main.c
@@ -109,8 +109,8 @@ op(WT_SESSION *session, WT_RAND_STATE *rnd, WT_CURSOR **cpp)
/*
* Use a checkpoint handle for 50% of reads.
*
- * FIXME: Checkpoint cursors are known to have issues in durable history so we've removing
- * the use of checkpoint handles in this test. As part of WT-5927, we should either
+ * FIXME-WT-5927: Checkpoint cursors are known to have issues in durable history so we've
+ * removing the use of checkpoint handles in this test. As part of WT-5927, we should either
* re-enable the testing of checkpoint cursors or remove this comment.
*/
ret = session->open_cursor(session, uri_list[i], NULL, NULL, &cursor);
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 2649f1ee703..62b5c205035 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -198,7 +198,9 @@ functions:
script: |
set -o errexit
set -o verbose
- ${test_env_vars|} ./test_random_abort ${random_abort_args|} 2>&1
+ #FIXME-WT-6093: reenable all calls to test_random_abort
+ echo "Warning: test_random_abort temporarily disabled"
+ ##${test_env_vars|} ./test_random_abort ${random_abort_args|} 2>&1
"timestamp abort test":
command: shell.exec
params:
@@ -226,19 +228,20 @@ functions:
for i in $(seq ${times|1}); do
# Run the various combinations of args. Let time and threads be random.
# Run current version with write-no-sync txns.
- ${test_env_vars|} ./test_random_abort 2>&1
+ echo "Warning: test_random_abort temporarily disabled"
+ ##${test_env_vars|} ./test_random_abort 2>&1
${test_env_vars|} ./test_timestamp_abort 2>&1
# Current version with memory-based txns (MongoDB usage).
- ${test_env_vars|} ./test_random_abort -m 2>&1
+ ##${test_env_vars|} ./test_random_abort -m 2>&1
${test_env_vars|} ./test_timestamp_abort -m 2>&1
# V1 log compatibility mode with write-no-sync txns.
- ${test_env_vars|} ./test_random_abort -C 2>&1
+ ##${test_env_vars|} ./test_random_abort -C 2>&1
${test_env_vars|} ./test_timestamp_abort -C 2>&1
# V1 log compatibility mode with memory-based txns.
- ${test_env_vars|} ./test_random_abort -C -m 2>&1
+ ##${test_env_vars|} ./test_random_abort -C -m 2>&1
${test_env_vars|} ./test_timestamp_abort -C -m 2>&1
${test_env_vars|} ./test_truncated_log ${truncated_log_args|} 2>&1
@@ -270,15 +273,14 @@ functions:
rm -rf "wiredtiger"
rm -rf "wiredtiger.tgz"
- # Temporarily disabled
- # "checkpoint test":
- # command: shell.exec
- # params:
- # working_dir: "wiredtiger/build_posix/test/checkpoint"
- # script: |
- # set -o errexit
- # set -o verbose
- # ./t ${checkpoint_args} 2>&1
+ "checkpoint test":
+ command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix/test/checkpoint"
+ script: |
+ set -o errexit
+ set -o verbose
+ ./t ${checkpoint_args} 2>&1
"checkpoint stress test":
command: shell.exec
@@ -577,17 +579,16 @@ tasks:
vars:
directory: test/cursor_order
- # Temporarily disabled
- # - name: fops-test
- # tags: ["pull_request"]
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - func: "compile wiredtiger"
- # - func: "make check directory"
- # vars:
- # directory: test/fops
+ - name: fops-test
+ tags: ["pull_request"]
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - func: "compile wiredtiger"
+ - func: "make check directory"
+ vars:
+ directory: test/fops
# Temporarily disabled
# - name: format-test
@@ -699,21 +700,20 @@ tasks:
# Start of csuite test tasks
- # Temporarily disabled
- # - name: csuite-import-test
- # tags: ["pull_request"]
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger/build_posix"
- # script: |
- # set -o errexit
- # set -o verbose
+ - name: csuite-import-test
+ tags: ["pull_request"]
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix"
+ script: |
+ set -o errexit
+ set -o verbose
- # ${test_env_vars|} $(pwd)/../test/csuite/import/smoke.sh 2>&1
+ ${test_env_vars|} $(pwd)/../test/csuite/import/smoke.sh 2>&1
- name: csuite-incr-backup-test
tags: ["pull_request"]
@@ -1494,40 +1494,23 @@ tasks:
pip install scons==3.1.1
scons-3.1.1.bat ${smp_command|} check
- # Temporarily disabled
- # - name: fops
- # tags: ["pull_request"]
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # if [ "Windows_NT" = "$OS" ]; then
- # cmd.exe /c t_fops.exe
- # else
- # build_posix/test/fops/t
- # fi
-
- # Temporarily disabled
- # - name: format
- # tags: ["windows_only"]
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # # format assumes we run it from the format directory
- # cmd.exe /c "cd test\\format && ..\\..\\t_format.exe reverse=0 encryption=none logging_compression=none runs=20"
+ - name: fops
+ tags: ["pull_request"]
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ if [ "Windows_NT" = "$OS" ]; then
+ cmd.exe /c t_fops.exe
+ else
+ build_posix/test/fops/t
+ fi
- name: million-collection-test
commands:
@@ -1555,135 +1538,129 @@ tasks:
set -o verbose
test/evergreen/compatibility_test_for_releases.sh
- # Temporarily disabled
- # - name: generate-datafile-little-endian
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - func: "compile wiredtiger"
- # - func: "format test"
- # vars:
- # times: 10
- # config: ../../../test/format/CONFIG.endian
- # extra_args: -h "WT_TEST.$i"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger/build_posix/test/format"
- # shell: bash
- # script: |
- # set -o errexit
- # set -o verbose
- # # Archive the WT_TEST directories which include the generated wt data files
- # tar -zcvf WT_TEST.tgz WT_TEST*
- # - command: s3.put
- # params:
- # aws_secret: ${aws_secret}
- # aws_key: ${aws_key}
- # local_file: wiredtiger/build_posix/test/format/WT_TEST.tgz
- # bucket: build_external
- # permissions: public-read
- # content_type: application/tar
- # display_name: WT_TEST
- # remote_file: wiredtiger/little-endian/${revision}/artifacts/WT_TEST.tgz
+ - name: generate-datafile-little-endian
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - func: "compile wiredtiger"
+ - func: "format test"
+ vars:
+ times: 10
+ config: ../../../test/format/CONFIG.endian
+ extra_args: -h "WT_TEST.$i"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix/test/format"
+ shell: bash
+ script: |
+ set -o errexit
+ set -o verbose
+ # Archive the WT_TEST directories which include the generated wt data files
+ tar -zcvf WT_TEST.tgz WT_TEST*
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wiredtiger/build_posix/test/format/WT_TEST.tgz
+ bucket: build_external
+ permissions: public-read
+ content_type: application/tar
+ display_name: WT_TEST
+ remote_file: wiredtiger/little-endian/${revision}/artifacts/WT_TEST.tgz
- # Temporarily disabled
- # - name: verify-datafile-little-endian
- # depends_on:
- # - name: compile
- # - name: generate-datafile-little-endian
- # commands:
- # - func: "fetch artifacts"
- # - func: "fetch artifacts from little-endian"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # ./test/evergreen/verify_wt_datafiles.sh 2>&1
+ - name: verify-datafile-little-endian
+ depends_on:
+ - name: compile
+ - name: generate-datafile-little-endian
+ commands:
+ - func: "fetch artifacts"
+ - func: "fetch artifacts from little-endian"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ ./test/evergreen/verify_wt_datafiles.sh 2>&1
- # Temporarily disabled
- # - name: verify-datafile-from-little-endian
- # depends_on:
- # - name: compile
- # - name: generate-datafile-little-endian
- # variant: little-endian
- # commands:
- # - func: "fetch artifacts"
- # - func: "fetch artifacts from little-endian"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # ./test/evergreen/verify_wt_datafiles.sh 2>&1
+ - name: verify-datafile-from-little-endian
+ depends_on:
+ - name: compile
+ - name: generate-datafile-little-endian
+ variant: little-endian
+ commands:
+ - func: "fetch artifacts"
+ - func: "fetch artifacts from little-endian"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ ./test/evergreen/verify_wt_datafiles.sh 2>&1
- # Temporarily disabled
- # - name: generate-datafile-big-endian
- # depends_on:
- # - name: compile
- # commands:
- # - func: "fetch artifacts"
- # - func: "compile wiredtiger"
- # - func: "format test"
- # vars:
- # times: 10
- # config: ../../../test/format/CONFIG.endian
- # extra_args: -h "WT_TEST.$i"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger/build_posix/test/format"
- # shell: bash
- # script: |
- # set -o errexit
- # set -o verbose
- # # Archive the WT_TEST directories which include the generated wt data files
- # tar -zcvf WT_TEST.tgz WT_TEST*
- # - command: s3.put
- # params:
- # aws_secret: ${aws_secret}
- # aws_key: ${aws_key}
- # local_file: wiredtiger/build_posix/test/format/WT_TEST.tgz
- # bucket: build_external
- # permissions: public-read
- # content_type: application/tar
- # display_name: WT_TEST
- # remote_file: wiredtiger/big-endian/${revision}/artifacts/WT_TEST.tgz
+ - name: generate-datafile-big-endian
+ depends_on:
+ - name: compile
+ commands:
+ - func: "fetch artifacts"
+ - func: "compile wiredtiger"
+ - func: "format test"
+ vars:
+ times: 10
+ config: ../../../test/format/CONFIG.endian
+ extra_args: -h "WT_TEST.$i"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix/test/format"
+ shell: bash
+ script: |
+ set -o errexit
+ set -o verbose
+ # Archive the WT_TEST directories which include the generated wt data files
+ tar -zcvf WT_TEST.tgz WT_TEST*
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wiredtiger/build_posix/test/format/WT_TEST.tgz
+ bucket: build_external
+ permissions: public-read
+ content_type: application/tar
+ display_name: WT_TEST
+ remote_file: wiredtiger/big-endian/${revision}/artifacts/WT_TEST.tgz
- # Temporarily disabled
- # - name: verify-datafile-big-endian
- # depends_on:
- # - name: compile
- # - name: generate-datafile-big-endian
- # commands:
- # - func: "fetch artifacts"
- # - func: "fetch artifacts from big-endian"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # ./test/evergreen/verify_wt_datafiles.sh 2>&1
+ - name: verify-datafile-big-endian
+ depends_on:
+ - name: compile
+ - name: generate-datafile-big-endian
+ commands:
+ - func: "fetch artifacts"
+ - func: "fetch artifacts from big-endian"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ ./test/evergreen/verify_wt_datafiles.sh 2>&1
- # Temporarily disabled
- # - name: verify-datafile-from-big-endian
- # depends_on:
- # - name: compile
- # - name: generate-datafile-big-endian
- # variant: big-endian
- # commands:
- # - func: "fetch artifacts"
- # - func: "fetch artifacts from big-endian"
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger"
- # script: |
- # set -o errexit
- # set -o verbose
- # ./test/evergreen/verify_wt_datafiles.sh 2>&1
+ - name: verify-datafile-from-big-endian
+ depends_on:
+ - name: compile
+ - name: generate-datafile-big-endian
+ variant: big-endian
+ commands:
+ - func: "fetch artifacts"
+ - func: "fetch artifacts from big-endian"
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+ ./test/evergreen/verify_wt_datafiles.sh 2>&1
- name: clang-analyzer
tags: ["pull_request"]
@@ -1731,7 +1708,6 @@ tasks:
vars:
format_test_script_args: -t 110 -j 4 direct_io=1
- # Temporarily disabled
# - name: linux-directio
# depends_on:
# - name: compile
@@ -1744,18 +1720,17 @@ tasks:
# config: ../../../test/format/CONFIG.stress
# extra_args: -C "direct_io=[data]"
- # Temporarily disabled
- # - name: format-linux-no-ftruncate
- # depends_on:
- # - name: compile-linux-no-ftruncate
- # commands:
- # - func: "fetch artifacts"
- # vars:
- # dependent_task: compile-linux-no-ftruncate
- # - func: "compile wiredtiger no linux ftruncate"
- # - func: "format test"
- # vars:
- # times: 3
+ - name: format-linux-no-ftruncate
+ depends_on:
+ - name: compile-linux-no-ftruncate
+ commands:
+ - func: "fetch artifacts"
+ vars:
+ dependent_task: compile-linux-no-ftruncate
+ - func: "compile wiredtiger no linux ftruncate"
+ - func: "format test"
+ vars:
+ times: 3
- name: package
commands:
@@ -1782,113 +1757,109 @@ tasks:
set -o verbose
${python_binary|python} syscall.py --verbose --preserve
- # Temporarily disabled
- # - name: checkpoint-filetypes-test
- # commands:
- # - func: "get project"
- # - func: "compile wiredtiger"
- # vars:
- # # Don't use diagnostic - this test looks for timing problems that are more likely to occur without it
- # posix_configure_flags: --enable-strict
- # - func: "checkpoint test"
- # vars:
- # checkpoint_args: -t m -n 1000000 -k 5000000 -C cache_size=100MB
- # - func: "checkpoint test"
- # vars:
- # checkpoint_args: -t r -n 1000000 -k 5000000 -C cache_size=100MB
- # - func: "checkpoint test"
- # vars:
- # checkpoint_args: -t c -n 1000000 -k 5000000 -C cache_size=100MB
+ - name: checkpoint-filetypes-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ # Don't use diagnostic - this test looks for timing problems that are more likely to occur without it
+ posix_configure_flags: --enable-strict
+ - func: "checkpoint test"
+ vars:
+ checkpoint_args: -t m -n 1000000 -k 5000000 -C cache_size=100MB
+ - func: "checkpoint test"
+ vars:
+ checkpoint_args: -t r -n 1000000 -k 5000000 -C cache_size=100MB
+ - func: "checkpoint test"
+ vars:
+ checkpoint_args: -t c -n 1000000 -k 5000000 -C cache_size=100MB
- # Temporarily disabled
- # - name: coverage-report
- # commands:
- # - func: "get project"
- # - func: "compile wiredtiger"
- # vars:
- # configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="--coverage -fPIC -ggdb" LDFLAGS=--coverage
- # posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-strict --enable-python --with-builtins=lz4,snappy,zlib
- # - func: "make check all"
- # - func: "unit test"
- # vars:
- # unit_test_args: -v 2 --long
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=snappy logging=1 logging_compression=snappy logging_prealloc=1
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row alter=1 backups=1 compaction=1 data_extend=1 prepare=1 rebalance=1 salvage=1 statistics=1 statistics_server=1 verify=1
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row firstfit=1 internal_key_truncation=1
- # - func: "format test"
- # vars:
- # extra_args: leak_memory=0 mmap=1 file_type=row checkpoints=0 in_memory=1 reverse=1 truncate=1
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=zlib huffman_key=1 huffman_value=1
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row isolation=random transaction_timestamps=0
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row data_source=lsm bloom=1
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=var compression=snappy checksum=uncompressed dictionary=1 repeat_data_pct=10
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=lz4 prefix_compression=1 leaf_page_max=9 internal_page_max=9 key_min=256 value_min=256
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=var leaf_page_max=9 internal_page_max=9 value_min=256
- # - func: "format test"
- # vars:
- # extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=fix
- # - command: shell.exec
- # params:
- # working_dir: "wiredtiger/build_posix"
- # script: |
- # set -o errexit
- # set -o verbose
+ - name: coverage-report
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="--coverage -fPIC -ggdb" LDFLAGS=--coverage
+ posix_configure_flags: --enable-silent-rules --enable-diagnostic --enable-strict --enable-python --with-builtins=lz4,snappy,zlib
+ - func: "make check all"
+ - func: "unit test"
+ vars:
+ unit_test_args: -v 2 --long
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=snappy logging=1 logging_compression=snappy logging_prealloc=1
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row alter=1 backups=1 compaction=1 data_extend=1 prepare=1 rebalance=1 salvage=1 statistics=1 statistics_server=1 verify=1
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row firstfit=1 internal_key_truncation=1
+ - func: "format test"
+ vars:
+ extra_args: leak_memory=0 mmap=1 file_type=row checkpoints=0 in_memory=1 reverse=1 truncate=1
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=zlib huffman_key=1 huffman_value=1
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row isolation=random transaction_timestamps=0
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row data_source=lsm bloom=1
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=var compression=snappy checksum=uncompressed dictionary=1 repeat_data_pct=10
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=row compression=lz4 prefix_compression=1 leaf_page_max=9 internal_page_max=9 key_min=256 value_min=256
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=var leaf_page_max=9 internal_page_max=9 value_min=256
+ - func: "format test"
+ vars:
+ extra_args: checkpoints=1 leak_memory=0 mmap=1 file_type=fix
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix"
+ script: |
+ set -o errexit
+ set -o verbose
- # GCOV=/opt/mongodbtoolchain/v3/bin/gcov gcovr -r .. -e '.*/bt_(debug|dump|misc|salvage|vrfy).*' -e '.*/(log|progress|verify_build|strerror|env_msg|err_file|cur_config|os_abort)\..*' -e '.*_stat\..*' --html -o ../coverage_report.html
- # - command: s3.put
- # params:
- # aws_secret: ${aws_secret}
- # aws_key: ${aws_key}
- # local_file: wiredtiger/coverage_report.html
- # bucket: build_external
- # permissions: public-read
- # content_type: text/html
- # display_name: Coverage report
- # remote_file: wiredtiger/${build_variant}/${revision}/coverage_report/coverage_report_${build_id}.html
+ GCOV=/opt/mongodbtoolchain/v3/bin/gcov gcovr -r .. -e '.*/bt_(debug|dump|misc|salvage|vrfy).*' -e '.*/(log|progress|verify_build|strerror|env_msg|err_file|cur_config|os_abort)\..*' -e '.*_stat\..*' --html -o ../coverage_report.html
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wiredtiger/coverage_report.html
+ bucket: build_external
+ permissions: public-read
+ content_type: text/html
+ display_name: Coverage report
+ remote_file: wiredtiger/${build_variant}/${revision}/coverage_report/coverage_report_${build_id}.html
- # Temporarily disabled
- # - name: spinlock-gcc-test
- # commands:
- # - func: "get project"
- # - func: "compile wiredtiger"
- # vars:
- # posix_configure_flags: --enable-python --with-spinlock=gcc --enable-strict
- # - func: "make check all"
- # - func: "format test"
- # vars:
- # times: 3
- # - func: "unit test"
+ - name: spinlock-gcc-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ posix_configure_flags: --enable-python --with-spinlock=gcc --enable-strict
+ - func: "make check all"
+ - func: "format test"
+ vars:
+ times: 3
+ - func: "unit test"
- # Temporarily disabled
- # - name: spinlock-pthread-adaptive-test
- # commands:
- # - func: "get project"
- # - func: "compile wiredtiger"
- # vars:
- # posix_configure_flags: --enable-python --with-spinlock=pthread_adaptive --enable-strict
- # - func: "make check all"
- # - func: "format test"
- # vars:
- # times: 3
- # - func: "unit test"
+ - name: spinlock-pthread-adaptive-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ posix_configure_flags: --enable-python --with-spinlock=pthread_adaptive --enable-strict
+ - func: "make check all"
+ - func: "format test"
+ vars:
+ times: 3
+ - func: "unit test"
- name: wtperf-test
depends_on:
@@ -2000,21 +1971,20 @@ tasks:
- func: "truncated log test"
# format test
- # Temporarily disabled
- # - func: "format test"
- # vars:
- # extra_args: file_type=fix
- # - func: "format test"
- # vars:
- # extra_args: file_type=row
+ - func: "format test"
+ vars:
+ extra_args: file_type=fix
+ - func: "format test"
+ vars:
+ extra_args: file_type=row
- #FIXME: Add wtperf testing from Jenkin "wiredtiger-test-check-long" after fixing WT-5270
+ #FIXME-WT-5270: Add wtperf testing from Jenkin "wiredtiger-test-check-long" after fixing WT-5270
- name: time-shift-sensitivity-test
depends_on:
- - name: compile
- vars:
- posix_configure_flags: --enable-strict
+ - name: compile
+ vars:
+ posix_configure_flags: --enable-strict
commands:
- func: "fetch artifacts"
vars:
@@ -2107,7 +2077,7 @@ tasks:
set -o errexit
set -o verbose
for i in {1..10}; do ${python_binary|python} split_stress.py; done
- # Temporarily disabled
+
- name: format-stress-test
# Set 25 hours timeout
exec_timeout_secs: 90000
@@ -2119,7 +2089,6 @@ tasks:
#run for 24 hours ( 24 * 60 = 1440 minutes), use default config
format_test_script_args: -b "SEGFAULT_SIGNALS=all catchsegv ./t" -t 1440
- # Temporarily disabled
- name: format-stress-smoke-test
# Set 7 hours timeout
exec_timeout_secs: 25200
@@ -2260,19 +2229,16 @@ buildvariants:
- name: make-check-msan-test
- name: compile-ubsan
- name: ubsan-test
- # Temporarily disabled
- # - name: linux-directio
- # distros: ubuntu1804-build
+ - name: linux-directio
+ distros: ubuntu1804-build
- name: syscall-linux
- name: make-check-asan-test
- name: configure-combinations
- # Temporarily disabled
# - name: checkpoint-filetypes-test
# - name: coverage-report
- name: unit-test-long
- # Temporarily disabled
# - name: spinlock-gcc-test
- # - name: spinlock-pthread-adaptive-test
+ - name: spinlock-pthread-adaptive-test
- name: compile-wtperf
- name: wtperf-test
- name: ftruncate-test
@@ -2347,8 +2313,7 @@ buildvariants:
- name: compile-linux-no-ftruncate
- name: make-check-linux-no-ftruncate-test
- name: unit-linux-no-ftruncate-test
- # Temporarily disabled
- # - name: format-linux-no-ftruncate
+ - name: format-linux-no-ftruncate
- name: rhel80
display_name: RHEL 8.0
@@ -2362,31 +2327,26 @@ buildvariants:
- name: compile
- name: make-check-test
- name: unit-test
- # Temporarily disabled
- # - name: fops
+ - name: fops
- name: time-shift-sensitivity-test
- name: compile-msan
- name: make-check-msan-test
- name: compile-ubsan
- name: ubsan-test
- # Temporarily disabled
- # - name: linux-directio
- # distros: rhel80-build
+ - name: linux-directio
+ distros: rhel80-build
- name: syscall-linux
- name: compile-asan
- name: make-check-asan-test
- # Temporarily disabled
# - name: checkpoint-filetypes-test
- name: unit-test-long
- # Temporarily disabled
# - name: spinlock-gcc-test
- # - name: spinlock-pthread-adaptive-test
+ - name: spinlock-pthread-adaptive-test
- name: compile-wtperf
- name: wtperf-test
- name: ftruncate-test
- name: long-test
- name: configure-combinations
- # Temporarily disabled
# - name: coverage-report
- name: large-scale-tests
@@ -2413,8 +2373,7 @@ buildvariants:
- name: compile
- name: ".windows_only"
- name: ".unit_test"
- # Temporarily disabled
- # - name: fops
+ - name: fops
- name: macos-1012
display_name: OS X 10.12
@@ -2430,40 +2389,37 @@ buildvariants:
- name: compile
- name: make-check-test
- name: unit-test
- # Temporarily disabled
- # - name: fops
-
-# Temporarily disabled
-# - name: little-endian
-# display_name: Little-endian (x86)
-# run_on:
-# - ubuntu1804-test
-# batchtime: 10080 # 7 days
-# expansions:
-# smp_command: -j $(grep -c ^processor /proc/cpuinfo)
-# test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs top_srcdir=$(pwd)/.. top_builddir=$(pwd)
-# tasks:
-# - name: compile
-# - name: generate-datafile-little-endian
-# - name: verify-datafile-little-endian
-# - name: verify-datafile-from-big-endian
-
-# Temporarily disabled
-# - name: big-endian
-# display_name: Big-endian (s390x/zSeries)
-# modules:
-# - enterprise
-# run_on:
-# - ubuntu1804-zseries-build
-# batchtime: 10080 # 7 days
-# expansions:
-# smp_command: -j $(grep -c ^processor /proc/cpuinfo)
-# test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.lib top_srcdir=$(pwd)/.. top_builddir=$(pwd)
-# tasks:
-# - name: compile
-# - name: generate-datafile-big-endian
-# - name: verify-datafile-big-endian
-# - name: verify-datafile-from-little-endian
+ - name: fops
+
+- name: little-endian
+ display_name: Little-endian (x86)
+ run_on:
+ - ubuntu1804-test
+ batchtime: 10080 # 7 days
+ expansions:
+ smp_command: -j $(grep -c ^processor /proc/cpuinfo)
+ test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.libs top_srcdir=$(pwd)/.. top_builddir=$(pwd)
+ tasks:
+ - name: compile
+ # - name: generate-datafile-little-endian
+ # - name: verify-datafile-little-endian
+ # - name: verify-datafile-from-big-endian
+
+- name: big-endian
+ display_name: Big-endian (s390x/zSeries)
+ modules:
+ - enterprise
+ run_on:
+ - ubuntu1804-zseries-build
+ batchtime: 10080 # 7 days
+ expansions:
+ smp_command: -j $(grep -c ^processor /proc/cpuinfo)
+ test_env_vars: PATH=/opt/mongodbtoolchain/v3/bin:$PATH LD_LIBRARY_PATH=$(pwd)/.lib top_srcdir=$(pwd)/.. top_builddir=$(pwd)
+ tasks:
+ - name: compile
+ # - name: generate-datafile-big-endian
+ # - name: verify-datafile-big-endian
+ # - name: verify-datafile-from-little-endian
- name: ubuntu1804-ppc
display_name: Ubuntu 18.04 PPC
diff --git a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
index a030f80c712..5dfd5554689 100755
--- a/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
+++ b/src/third_party/wiredtiger/test/evergreen/compatibility_test_for_releases.sh
@@ -6,10 +6,10 @@
set -e
#############################################################
-# format_b_flag:
+# bflag:
# arg1: branch name
#############################################################
-format_b_flag()
+bflag()
{
# Return if the branch's format command takes the -B flag for backward compatibility.
test "$1" = "develop" && echo "-B "
@@ -63,7 +63,7 @@ run_format()
cd "$1/test/format"
- flags="-1q $(format_b_flag $1)"
+ flags="-1q $(bflag $1)"
args=""
args+="cache=80 " # Medium cache so there's eviction
@@ -115,7 +115,7 @@ verify_branches()
for am in $3; do
echo "$1/wt verifying $2 access method $am..."
dir="$2/test/format/RUNDIR.$am"
- WIREDTIGER_CONFIG="$EXT" ./wt -h "../$dir" verify table:wt
+ WIREDTIGER_CONFIG="$EXT" ./wt $(bflag $1) -h "../$dir" verify table:wt
done
}
@@ -138,12 +138,12 @@ upgrade_downgrade()
for reps in {1..2}; do
echo "$1 format running on $2 access method $am..."
cd "$top/$1/test/format"
- flags="-1qR $(format_b_flag $1)"
+ flags="-1qR $(bflag $1)"
./t $flags -h "$top/$2/test/format/RUNDIR.$am" timer=2
echo "$2 format running on $2 access method $am..."
cd "$top/$2/test/format"
- flags="-1qR $(format_b_flag $2)"
+ flags="-1qR $(bflag $2)"
./t $flags -h "RUNDIR.$am" timer=2
done
done
@@ -184,20 +184,20 @@ cd develop; wt2=$(get_prev_version 2); cd ..
(verify_branches mongodb-3.6 mongodb-3.4 "fix row var")
(verify_branches mongodb-4.0 mongodb-3.6 "fix row var")
(verify_branches mongodb-4.2 mongodb-4.0 "fix row var")
-### (verify_branches mongodb-4.4 mongodb-4.2 "fix row var")
-### (verify_branches develop mongodb-4.4 "row")
+(verify_branches mongodb-4.4 mongodb-4.2 "fix row var")
+(verify_branches develop mongodb-4.4 "row")
(verify_branches develop mongodb-4.2 "row")
(verify_branches "$wt1" "$wt2" "row")
(verify_branches develop "$wt1" "row")
# Verify forward compatibility for supported access methods.
-### (verify_branches mongodb-4.2 mongodb-4.4 "row")
+(verify_branches mongodb-4.2 mongodb-4.4 "row")
(verify_branches mongodb-4.2 develop "row")
-### (verify_branches mongodb-4.4 develop "row")
+(verify_branches mongodb-4.4 develop "row")
# Upgrade/downgrade testing for supported access methods.
-### (upgrade_downgrade mongodb-4.2 mongodb-4.4 "row")
+(upgrade_downgrade mongodb-4.2 mongodb-4.4 "row")
(upgrade_downgrade mongodb-4.2 develop "row")
-### (upgrade_downgrade mongodb-4.4 develop "row")
+(upgrade_downgrade mongodb-4.4 develop "row")
exit 0
diff --git a/src/third_party/wiredtiger/test/fops/Makefile.am b/src/third_party/wiredtiger/test/fops/Makefile.am
index 7a5920221ae..519f6315445 100644
--- a/src/third_party/wiredtiger/test/fops/Makefile.am
+++ b/src/third_party/wiredtiger/test/fops/Makefile.am
@@ -11,8 +11,7 @@ t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
-# Temporarily disabled
-# TESTS = $(noinst_PROGRAMS)
+TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c
index 0f41a311e43..69f986aa79a 100644
--- a/src/third_party/wiredtiger/test/format/bulk.c
+++ b/src/third_party/wiredtiger/test/format/bulk.c
@@ -133,20 +133,20 @@ wts_load(void)
if (!is_bulk)
cursor->set_key(cursor, keyno);
cursor->set_value(cursor, *(uint8_t *)value.data);
- logop(session, "%-10s %" PRIu64 " {0x%02" PRIx8 "}", "bulk", keyno,
+ logop(session, "%-10s %" PRIu32 " {0x%02" PRIx8 "}", "bulk", keyno,
((uint8_t *)value.data)[0]);
break;
case VAR:
if (!is_bulk)
cursor->set_key(cursor, keyno);
cursor->set_value(cursor, &value);
- logop(session, "%-10s %" PRIu64 " {%.*s}", "bulk", keyno, (int)value.size,
+ logop(session, "%-10s %" PRIu32 " {%.*s}", "bulk", keyno, (int)value.size,
(char *)value.data);
break;
case ROW:
cursor->set_key(cursor, &key);
cursor->set_value(cursor, &value);
- logop(session, "%-10s %" PRIu64 " {%.*s}, {%.*s}", "bulk", keyno, (int)key.size,
+ logop(session, "%-10s %" PRIu32 " {%.*s}, {%.*s}", "bulk", keyno, (int)key.size,
(char *)key.data, (int)value.size, (char *)value.data);
break;
}
@@ -175,15 +175,8 @@ wts_load(void)
g.c_delete_pct += g.c_insert_pct - 5;
g.c_insert_pct = 5;
}
- if (g.c_delete_pct < 20) {
- g.c_delete_pct += g.c_write_pct / 2;
- g.c_write_pct = g.c_write_pct / 2;
- }
- if (g.c_delete_pct < 20) {
- g.c_delete_pct += g.c_modify_pct / 2;
- g.c_write_pct = g.c_modify_pct / 2;
- }
- break;
+ g.c_delete_pct += g.c_write_pct / 2;
+ g.c_write_pct = g.c_write_pct / 2;
}
}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 6a668fa4f45..ef3a79e7b53 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -496,7 +496,7 @@ prepare_transaction(TINFO *tinfo)
{
WT_DECL_RET;
WT_SESSION *session;
- uint64_t ts;
+ uint64_t longwait, pause_ms, ts;
char buf[64];
session = tinfo->session;
@@ -523,6 +523,19 @@ prepare_transaction(TINFO *tinfo)
lock_writeunlock(session, &g.ts_lock);
+ /*
+ * Sometimes add a delay after prepare to induce extra memory stress. For 80% of the threads,
+ * there is never a delay, so there is always a dedicated set of threads trying to do work. For
+ * the other 20%, we'll sometimes delay. For these threads, 99% of the time, proceed without
+ * delay. The rest of the time, pause up to 5 seconds, weighted toward the smaller delays.
+ */
+ if (tinfo->id % 5 == 0) {
+ longwait = mmrand(&tinfo->rnd, 0, 999);
+ if (longwait < 10) {
+ pause_ms = mmrand(&tinfo->rnd, 1, 10) << longwait;
+ __wt_sleep(0, pause_ms * WT_THOUSAND);
+ }
+ }
return (ret);
}
@@ -1240,11 +1253,11 @@ order_error_col:
* less-than, row-store inserts new rows in-between rows by appending a new suffix
* to the row's key.)
*/
- testutil_check(__wt_buf_fmt((WT_SESSION_IMPL *)cursor->session, tinfo->tbuf, "%.*s",
+ testutil_check(__wt_buf_fmt(CUR2S(cursor), tinfo->tbuf, "%.*s",
(int)tinfo->key->size, (char *)tinfo->key->data));
keyno_prev = strtoul(tinfo->tbuf->data, NULL, 10);
- testutil_check(__wt_buf_fmt((WT_SESSION_IMPL *)cursor->session, tinfo->tbuf, "%.*s",
- (int)key.size, (char *)key.data));
+ testutil_check(__wt_buf_fmt(
+ CUR2S(cursor), tinfo->tbuf, "%.*s", (int)key.size, (char *)key.data));
keyno = strtoul(tinfo->tbuf->data, NULL, 10);
if (incrementing) {
if (keyno_prev != keyno && keyno_prev + 1 != keyno)
@@ -1258,8 +1271,7 @@ order_error_row:
(char *)tinfo->key->data, (int)key.size, (char *)key.data);
}
- testutil_check(
- __wt_buf_set((WT_SESSION_IMPL *)cursor->session, tinfo->key, key.data, key.size));
+ testutil_check(__wt_buf_set(CUR2S(cursor), tinfo->key, key.data, key.size));
break;
}
break;
diff --git a/src/third_party/wiredtiger/test/suite/test_backup01.py b/src/third_party/wiredtiger/test/suite/test_backup01.py
index ff4dd439189..a39da2e3d18 100644
--- a/src/third_party/wiredtiger/test/suite/test_backup01.py
+++ b/src/third_party/wiredtiger/test/suite/test_backup01.py
@@ -30,6 +30,7 @@ import glob
import os
import shutil
import string
+import time
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
from wtdataset import SimpleDataSet, ComplexDataSet, ComplexLSMDataSet
@@ -163,8 +164,7 @@ class test_backup(wttest.WiredTigerTestCase, suite_subprocess):
self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
self.assertEqual(i, total)
- # Test that named checkpoints can't be deleted while backup cursors are
- # open, but that normal checkpoints continue to work.
+ # Test interaction between checkpoints and a backup cursor.
def test_checkpoint_delete(self):
# You cannot name checkpoints including LSM tables, skip those.
self.populate(1)
@@ -177,7 +177,8 @@ class test_backup(wttest.WiredTigerTestCase, suite_subprocess):
self.objs[0][0], None, "checkpoint=one"))
# Confirm opening a backup cursor causes checkpoint to fail if dropping
- # a named checkpoint, but does not stop a default checkpoint.
+ # a named checkpoint created before the backup cursor, but does not stop a
+ # default checkpoint.
cursor = self.session.open_cursor('backup:', None, None)
self.session.checkpoint()
msg = '/checkpoints cannot be deleted during a hot backup/'
@@ -187,7 +188,24 @@ class test_backup(wttest.WiredTigerTestCase, suite_subprocess):
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.checkpoint("name=three,drop=(two)"), msg)
self.session.checkpoint()
+
+ # Confirm that a named checkpoint created after a backup cursor can be dropped.
+ # Need to pause a couple seconds; checkpoints that are assigned the same timestamp as
+ # the backup will be pinned, even if they occur after the backup starts.
+ time.sleep(2)
+ self.session.checkpoint("name=four")
+ self.session.checkpoint("drop=(four)")
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(
+ self.objs[0][0], None, "checkpoint=four"))
+
+ # Confirm that after closing the backup cursor the original named checkpoint can
+ # be deleted.
cursor.close()
+ self.session.checkpoint("drop=(two)")
+ self.assertRaises(wiredtiger.WiredTigerError,
+ lambda: self.session.open_cursor(
+ self.objs[0][0], None, "checkpoint=two"))
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_checkpoint05.py b/src/third_party/wiredtiger/test/suite/test_checkpoint05.py
new file mode 100644
index 00000000000..58af3003a60
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_checkpoint05.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_checkpoint05.py
+# Verify that we don't accumulate a lot of checkpoints while a backup
+# cursor is open. WiredTiger checkpoints created after the backup cursor
+# should get deleted as usual.
+
+import time
+import wiredtiger, wttest
+
+class test_checkpoint05(wttest.WiredTigerTestCase):
+ conn_config = 'create,cache_size=100MB,log=(archive=false,enabled=true,file_max=100K)'
+
+ def count_checkpoints(self):
+ metadata_cursor = self.session.open_cursor('metadata:', None, None)
+
+ nckpt = 0
+ while metadata_cursor.next() == 0:
+ key = metadata_cursor.get_key()
+ value = metadata_cursor[key]
+ nckpt = nckpt + value.count("WiredTigerCheckpoint")
+ metadata_cursor.close()
+ return nckpt
+
+ def test_checkpoints_during_backup(self):
+ self.uri = 'table:ckpt05'
+ self.session.create(self.uri, 'key_format=i,value_format=i')
+
+ # Setup: Insert some data and checkpoint it
+ cursor = self.session.open_cursor(self.uri, None)
+ for i in range(16):
+ cursor[i] = i
+ self.session.checkpoint(None)
+
+ # Create backup and check how many checkpoints we have.
+ backup_cursor = self.session.open_cursor('backup:', None, None)
+ initial_count = self.count_checkpoints()
+
+ # Checkpoints created immediately after a backup cursor may get pinned.
+ # Pause to avoid this.
+ time.sleep(2)
+
+ # Take a bunch of checkpoints.
+ for i in range (50):
+ self.session.checkpoint('force=true')
+ cursor.close()
+
+ # There may be a few more checkpoints than when we opened the
+ # backup cursor, but not too many more. The factor of three
+ # is generous. But if WT isn't deleting checkpoints there would
+ # be about 30x more checkpoints here.
+ final_count = self.count_checkpoints()
+ self.assertTrue (final_count < initial_count * 3)
+
+ self.session.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode05.py b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
index 9ecbb6aa196..089e77f604a 100644
--- a/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode05.py
@@ -42,7 +42,7 @@ class test_debug_mode05(wttest.WiredTigerTestCase):
uri = 'file:test_debug_mode05'
def test_table_logging_rollback_to_stable(self):
- self.session.create(self.uri, 'key_format=i,value_format=u')
+ self.session.create(self.uri, 'key_format=i,value_format=u,log=(enabled=false)')
cursor = self.session.open_cursor(self.uri, None)
diff --git a/src/third_party/wiredtiger/test/suite/test_durable_ts03.py b/src/third_party/wiredtiger/test/suite/test_durable_ts03.py
index 8fdb1f615ae..43e03431709 100755
--- a/src/third_party/wiredtiger/test/suite/test_durable_ts03.py
+++ b/src/third_party/wiredtiger/test/suite/test_durable_ts03.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
from helper import copy_wiredtiger_home
-import unittest, wiredtiger, wttest
+import wiredtiger, wttest
def timestamp_str(t):
return '%x' %t
@@ -38,7 +38,6 @@ class test_durable_ts03(wttest.WiredTigerTestCase):
conn_config = 'cache_size=10MB'
session_config = 'isolation=snapshot'
- @unittest.skip("Temporarily disabled")
def test_durable_ts03(self):
# Create a table.
uri = 'table:test_durable_ts03'
diff --git a/src/third_party/wiredtiger/test/suite/test_hs06.py b/src/third_party/wiredtiger/test/suite/test_hs06.py
index 042d9c731cb..b00a196c809 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs06.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs06.py
@@ -47,7 +47,8 @@ class test_hs06(wttest.WiredTigerTestCase):
conn_config = 'cache_size=50MB,statistics=(fast)'
session_config = 'isolation=snapshot'
key_format_values = [
- ('column', dict(key_format='r')),
+ # The commented columnar tests needs to be enabled once columnar page instantiated is fixed in (WT-6061).
+ # ('column', dict(key_format='r')),
('integer', dict(key_format='i')),
('string', dict(key_format='S'))
]
diff --git a/src/third_party/wiredtiger/test/suite/test_hs08.py b/src/third_party/wiredtiger/test/suite/test_hs08.py
index b0d0d497dd0..905cc84db1a 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs08.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs08.py
@@ -45,7 +45,7 @@ class test_hs08(wttest.WiredTigerTestCase):
stat_cursor.close()
return val
- def test_modify_insert_to_las(self):
+ def test_modify_insert_to_hs(self):
uri = "table:test_hs08"
create_params = 'value_format=S,key_format=i'
value1 = 'a' * 1000
diff --git a/src/third_party/wiredtiger/test/suite/test_hs09.py b/src/third_party/wiredtiger/test/suite/test_hs09.py
index 43ced8ad589..ac34e3f7b17 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs09.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs09.py
@@ -42,7 +42,8 @@ class test_hs09(wttest.WiredTigerTestCase):
session_config = 'isolation=snapshot'
uri = "table:test_hs09"
key_format_values = [
- ('column', dict(key_format='r')),
+ # The commented columnar tests needs to be enabled once columnar page instantiated is fixed in (WT-6061).
+ #('column', dict(key_format='r')),
('integer', dict(key_format='i')),
('string', dict(key_format='S')),
]
@@ -63,7 +64,7 @@ class test_hs09(wttest.WiredTigerTestCase):
cursor.close()
# Check the history store file value
cursor = session.open_cursor("file:WiredTigerHS.wt", None, 'checkpoint=WiredTigerCheckpoint')
- for _, _, hs_start_ts, _, hs_stop_ts, _, _, _, type, value in cursor:
+ for _, _, hs_start_ts, _, hs_stop_ts, _, type, value in cursor:
# No WT_UPDATE_TOMBSTONE in the history store
self.assertNotEqual(type, 5)
# No WT_UPDATE_BIRTHMARK in the history store
diff --git a/src/third_party/wiredtiger/test/suite/test_hs10.py b/src/third_party/wiredtiger/test/suite/test_hs10.py
index 4a33ced8125..f41f18bb999 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs10.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs10.py
@@ -45,7 +45,7 @@ class test_hs10(wttest.WiredTigerTestCase):
stat_cursor.close()
return val
- def test_modify_insert_to_las(self):
+ def test_modify_insert_to_hs(self):
uri = "table:test_hs10"
uri2 = "table:test_hs10_otherdata"
create_params = 'value_format=S,key_format=i'
diff --git a/src/third_party/wiredtiger/test/suite/test_hs12.py b/src/third_party/wiredtiger/test/suite/test_hs12.py
new file mode 100644
index 00000000000..7403126fd5e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs12.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest, time
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_hs12.py
+# Verify we can correctly append modifies to the end of string values
+class test_hs12(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=2MB,statistics=(all),eviction=(threads_max=1)'
+ session_config = 'isolation=snapshot'
+
+ def test_modify_append_to_string(self):
+ uri = "table:test_reverse_modify01_notimestamp"
+ create_params = 'value_format=S,key_format=i'
+ value1 = 'abcedfghijklmnopqrstuvwxyz' * 5
+ value2 = 'b' * 100
+ valuebig = 'e' * 1000
+ self.session.create(uri, create_params)
+ cursor = self.session.open_cursor(uri)
+
+ session2 = self.setUpSessionOpen(self.conn)
+ session2.create(uri, create_params)
+ cursor2 = session2.open_cursor(uri)
+
+ # Insert a full value.
+ self.session.begin_transaction()
+ cursor[1] = value1
+ cursor[2] = value1
+ self.session.commit_transaction()
+
+ # Insert a modify
+ self.session.begin_transaction()
+ cursor.set_key(1)
+ cursor.modify([wiredtiger.Modify('A', 130, 0)])
+ cursor.set_key(2)
+ cursor.modify([wiredtiger.Modify('AB', 0, 0)])
+ self.session.commit_transaction()
+
+ # Validate that we do see the correct value.
+ session2.begin_transaction()
+ cursor2.set_key(1)
+ cursor2.search()
+ self.assertEquals(cursor2.get_value(), value1 + 'A')
+ cursor2.set_key(2)
+ cursor2.search()
+ self.assertEquals(cursor2.get_value(), 'AB' + value1)
+ session2.commit_transaction()
+
+ # Begin transaction on session 2 so it sees the current snap_min and snap_max
+ session2.begin_transaction()
+
+ # reset the cursor
+ cursor2.reset()
+
+ # Insert one more value
+ self.session.begin_transaction()
+ cursor.set_key(1)
+ cursor[1] = value2
+ self.session.commit_transaction()
+
+ # Insert a whole bunch of data into the table to force wiredtiger to evict data
+ # from the previous table.
+ self.session.begin_transaction()
+ for i in range(2, 10000):
+ cursor[i] = valuebig
+ self.session.commit_transaction()
+
+ # Try to find the value we saw earlier
+ cursor2.set_key(1)
+ cursor2.search()
+ self.assertEquals(cursor2.get_value(), value1 + 'A')
+ cursor2.set_key(2)
+ cursor2.search()
+ self.assertEquals(cursor2.get_value(), 'AB' + value1)
+
+if __name__ == '__main__':
+ wttest.run() \ No newline at end of file
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare_hs01.py b/src/third_party/wiredtiger/test/suite/test_prepare_hs01.py
index aa1a8e875ee..494193e3da4 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare_hs01.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare_hs01.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
from helper import copy_wiredtiger_home
-import unittest, wiredtiger, wttest
+import wiredtiger, wttest
from wtdataset import SimpleDataSet
def timestamp_str(t):
@@ -39,6 +39,19 @@ class test_prepare_hs01(wttest.WiredTigerTestCase):
# Force a small cache.
conn_config = 'cache_size=50MB'
+ def check(self, uri, ds, nrows, nsessions, nkeys, read_ts, expected_value, not_expected_value):
+ cursor = self.session.open_cursor(uri)
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(read_ts))
+ for i in range(1, nsessions * nkeys):
+ cursor.set_key(ds.key(nrows + i))
+ self.assertEquals(cursor.search(), 0)
+ # Correctness Test - commit_value should be visible
+ self.assertEquals(cursor.get_value(), expected_value)
+ # Correctness Test - prepare_value should NOT be visible
+ self.assertNotEquals(cursor.get_value(), not_expected_value)
+ cursor.close()
+ self.session.commit_transaction()
+
def prepare_updates(self, uri, ds, nrows, nsessions, nkeys):
# Update a large number of records in their individual transactions.
# This will force eviction and start history store eviction of committed
@@ -84,16 +97,9 @@ class test_prepare_hs01(wttest.WiredTigerTestCase):
self.assertEquals(cursors[j].insert(), 0)
sessions[j].prepare_transaction('prepare_timestamp=' + timestamp_str(2))
- # Re-read the original versions of all the data. To do this, the pages
- # that were just evicted need to be read back. This ensures reading
- # prepared updates from the history store
- cursor = self.session.open_cursor(uri)
- self.session.begin_transaction('read_timestamp=' + timestamp_str(1))
- for i in range(1, nsessions * nkeys):
- cursor.set_key(ds.key(nrows + i))
- self.assertEquals(cursor.search(), 0)
- cursor.close()
- self.session.commit_transaction()
+ # Re-read the original versions of all the data. This ensures reading
+ # original versions from the history store
+ self.check(uri, ds, nrows, nsessions, nkeys, 1, bigvalue1, bigvalue2)
# Close all cursors and sessions, this will cause prepared updates to be
# rollback-ed
@@ -101,7 +107,11 @@ class test_prepare_hs01(wttest.WiredTigerTestCase):
cursors[j].close()
sessions[j].close()
- @unittest.skip("Temporarily disabled")
+ # Re-read the original versions of all the data. This ensures reading
+ # original versions from the data store as the prepared updates are
+ # aborted
+ self.check(uri, ds, nrows, nsessions, nkeys, 2, bigvalue1, bigvalue2)
+
def test_prepare_hs(self):
# Create a small table.
uri = "table:test_prepare_hs01"
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare_hs03.py b/src/third_party/wiredtiger/test/suite/test_prepare_hs03.py
index 847bc0977c8..946a6597447 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare_hs03.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare_hs03.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
from helper import copy_wiredtiger_home
-import unittest, wiredtiger, wttest
+import wiredtiger, wttest
from wtdataset import SimpleDataSet
import os, shutil
from wtscenario import make_scenarios
@@ -183,7 +183,6 @@ class test_prepare_hs03(wttest.WiredTigerTestCase):
# and call verify
self.corrupt_salvage_verify()
- @unittest.skip("Temporarily disabled")
def test_prepare_hs(self):
nrows = 100
ds = SimpleDataSet(self, self.uri, nrows, key_format="S", value_format='u')
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py
index 36daf16573f..b16ee76a1f2 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable05.py
@@ -150,7 +150,7 @@ class test_rollback_to_stable05(test_rollback_to_stable_base):
else:
self.assertEqual(pages_visited, 0)
self.assertEqual(upd_aborted, 0)
- self.assertEqual(hs_removed, nrows * 3 * 2)
+ self.assertEqual(hs_removed, 0)
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_txn19.py b/src/third_party/wiredtiger/test/suite/test_txn19.py
index cd68c0c9931..7ba1e60f68d 100755
--- a/src/third_party/wiredtiger/test/suite/test_txn19.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn19.py
@@ -423,7 +423,6 @@ class test_txn19_meta(wttest.WiredTigerTestCase, suite_subprocess):
not_salvageable = [
"removal:WiredTiger.turtle",
"removal:WiredTiger.wt",
- "removal:WiredTigerHS.wt",
"truncate:WiredTiger.wt",
"truncate:WiredTigerHS.wt",
"zero:WiredTiger.wt",
@@ -482,13 +481,7 @@ class test_txn19_meta(wttest.WiredTigerTestCase, suite_subprocess):
closeconn=False)
if expect_fail:
- errmsg = 'WT_TRY_SALVAGE: database corruption detected'
- if self.filename == 'WiredTigerHS.wt':
- if self.kind == 'removal':
- errmsg = 'handle-open'
- elif self.kind == 'truncate':
- errmsg = 'file size=0, alloc size=4096'
- self.check_file_contains_one_of(errfile, [errmsg])
+ self.check_file_contains_one_of(errfile, ['WT_TRY_SALVAGE: database corruption detected'])
def test_corrupt_meta(self):
errfile = 'list.err'
@@ -546,13 +539,10 @@ class test_txn19_meta(wttest.WiredTigerTestCase, suite_subprocess):
# an error during the wiredtiger_open. But the nature of the
# messages produced during the error is variable by which case
# it is, and even variable from system to system.
- if self.filename == "WiredTigerHS.wt":
- self.run_wt_and_check(salvagedir, salvagedir + '_' + errfile, salvagedir + '_' + outfile, True)
- else:
- with self.expectedStdoutPattern('.'):
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda: self.reopen_conn(salvagedir, salvage_config),
- '/.*/')
+ with self.expectedStdoutPattern('.'):
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.reopen_conn(salvagedir, salvage_config),
+ '/.*/')
if __name__ == '__main__':
wttest.run()