summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-05-06 15:45:01 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-06 06:00:50 +0000
commitd2274bb6e1f8b21d73121a2fcb20b6628f652bbe (patch)
tree72c771934dab7adff1bbffdcb1af1ac6e1c36a0d /src/third_party/wiredtiger/src/btree
parente500238a9ea3d5498ebffeb74a1aceac42eb2c1f (diff)
downloadmongo-d2274bb6e1f8b21d73121a2fcb20b6628f652bbe.tar.gz
Import wiredtiger: 18dfb9e58e39927696affcd8e362364e23e1aa59 from branch mongodb-4.4r4.4.0-rc4
ref: a707df12a2..18dfb9e58e for: 4.4.0-rc4 WT-5242 Minimize checkpoints pinned during backup WT-5470 Reduce copies and allocations in read path WT-5673 Prepare support with durable history: modify verify and salvage as needed WT-5677 Prepare support with durable history: add test/format stress tests WT-5710 Review WT_PANIC usage WT-5716 Create the history store file at the same time as creating the metadata file in wiredtiger open WT-5839 Ignore non-globally visible tombstones for both data store and hs store in hs verification WT-5841 Return WT_TRY_SALVAGE when the history file is removed or truncated WT-5928 Cleanup stale FIXMEs from durable history WT-5977 WT_SESSION_NO_RECONCILE flag set by history cursor prevents eviction WT-5984 Allow prepared updates to be evicted in durable history WT-6009 Prepare support with durable history: add statistic for prepared updates evicted WT-6032 Turn on mongodb-4.4 branch upgrade/downgrade testing WT-6051 Fix reconstructing full value from modifies for string format WT-6068 Re-enable tests temporarily disabled during durable history development WT-6069 Remove WT_UPDATE_RESTORED_FROM_DISK flag WT-6070 Coverity : Copy paste error WT-6071 Coverity : Change format specifier WT-6086 Move time windows and aggregated time windows into structures WT-6087 Add a C2S(cursor) macro to simplify translation from a cursor to a session WT-6095 Verify on-disk page only for row store as part of rollback to stable WT-6109 Cleanup usage of cursor->session WT-6110 Cleanup cast from cbt to cursor WT-6120 Remove use-after-free in __verify_history_store_id WT-6130 Disable test_random_abort
Diffstat (limited to 'src/third_party/wiredtiger/src/btree')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c138
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c134
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c169
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c61
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c59
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c62
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c241
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c31
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c69
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c23
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c236
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c141
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c2
21 files changed, 621 insertions, 780 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index d6c89dacd33..9ea91c6f421 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -16,9 +16,8 @@ static inline int
__cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -58,28 +57,14 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iface.value.data = &cbt->v;
} else {
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it
- * to the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
}
cbt->iface.value.size = 1;
return (0);
@@ -95,12 +80,10 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_BTREE *btree;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = cbt->ref->page;
- upd = NULL;
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -127,33 +110,20 @@ new_page:
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
/*
- * FIXME-PM-1523: Now we only do transaction read if we have an update chain and it doesn't work
+ * FIXME-WT-6127: Now we only do transaction read if we have an update chain and it doesn't work
* in durable history. Review this when we have a plan for fixed-length column store.
*/
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
restart_read:
- WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it to
- * the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
cbt->iface.value.size = 1;
return (0);
}
@@ -166,9 +136,8 @@ static inline int
__cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -187,18 +156,17 @@ new_page:
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
- if (upd == NULL)
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -216,10 +184,9 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_INSERT *ins;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint64_t rle, rle_start;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
rle_start = 0; /* -Werror=maybe-uninitialized */
@@ -258,18 +225,17 @@ restart_read:
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/*
@@ -309,8 +275,9 @@ restart_read:
continue;
}
- WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip, &upd));
- if (upd == NULL)
+ WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID ||
+ cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
continue;
return (0);
}
@@ -334,10 +301,9 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_PAGE *page;
WT_ROW *rip;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool kpack_used;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
key = &cbt->iface.key;
@@ -386,17 +352,16 @@ restart_read_insert:
if ((ins = cbt->ins) != NULL) {
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- WT_RET(__wt_txn_read_upd_list(session, ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* Check for the end of the page. */
@@ -422,17 +387,16 @@ restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
WT_RET(__wt_txn_read(
- session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL, &upd));
- if (upd == NULL)
+ session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -461,7 +425,7 @@ __cursor_key_order_check_col(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, boo
return (0);
}
- WT_PANIC_RET(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64
+ WT_RET_PANIC(session, EINVAL, "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64
" then "
"key %" PRIu64,
next ? "next" : "prev", cbt->lastrecno, cbt->recno);
@@ -494,7 +458,7 @@ __cursor_key_order_check_row(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, boo
WT_ERR(__wt_scr_alloc(session, 512, &a));
WT_ERR(__wt_scr_alloc(session, 512, &b));
- WT_PANIC_ERR(session, EINVAL,
+ WT_ERR_PANIC(session, EINVAL,
"WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
"key %.1024s",
next ? "next" : "prev", __wt_buf_set_printable_format(session, cbt->lastkey->data,
@@ -536,7 +500,7 @@ __wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt)
{
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* Cursor searches set the position for cursor movements, set the last-key value for diagnostic
@@ -648,7 +612,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
bool newpage, restart;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_next);
WT_STAT_DATA_INCR(session, cursor_next);
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 0099d1ae594..f8db9cd6233 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -40,7 +40,7 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
uint64_t recno;
int i;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
restart:
/*
@@ -123,9 +123,8 @@ static inline int
__cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -198,28 +197,14 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iface.value.data = &cbt->v;
} else {
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it
- * to the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
}
cbt->iface.value.size = 1;
return (0);
@@ -235,9 +220,8 @@ __cursor_fix_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_BTREE *btree;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
btree = S2BT(session);
@@ -265,35 +249,21 @@ new_page:
cbt->ins = __col_insert_search(cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = NULL;
/*
- * FIXME-PM-1523: Now we only do transaction read if we have an update chain and it doesn't work
+ * FIXME-WT-6127: Now we only do transaction read if we have an update chain and it doesn't work
* in durable history. Review this when we have a plan for fixed-length column store.
*/
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
restart_read:
- WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL, &upd));
- if (upd == NULL) {
+ WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, cbt->ins->upd, NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
- } else {
- /*
- * If this update has been restored from the disk, it needs to be freed after copying it to
- * the user cursor.
- */
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- switch (upd->type) {
- case WT_UPDATE_TOMBSTONE:
- cbt->iface.value.data = upd->data;
- __wt_free_update_list(session, &upd);
- break;
- default:
- return (__wt_value_return(cbt, upd));
- }
- }
- if (upd != NULL)
- cbt->iface.value.data = upd->data;
- }
+ } else if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
+ cbt->iface.value.data = cbt->upd_value->buf.data;
+ else
+ WT_RET(__wt_value_return(cbt, cbt->upd_value));
cbt->iface.value.size = 1;
return (0);
}
@@ -306,9 +276,8 @@ static inline int
__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* If restarting after a prepare conflict, jump to the right spot. */
if (restart)
@@ -327,17 +296,16 @@ new_page:
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
restart_read:
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK) && upd->type != WT_UPDATE_TOMBSTONE)
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -355,10 +323,9 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_INSERT *ins;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint64_t rle_start;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
rle_start = 0; /* -Werror=maybe-uninitialized */
@@ -398,18 +365,17 @@ restart_read:
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = NULL;
+ __wt_upd_value_clear(cbt->upd_value);
if (cbt->ins != NULL)
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/*
@@ -449,8 +415,9 @@ restart_read:
continue;
}
- WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip, &upd));
- if (upd == NULL)
+ WT_RET(__wt_bt_col_var_cursor_walk_txn_read(session, cbt, page, &unpack, cip));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID ||
+ cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
continue;
return (0);
}
@@ -474,10 +441,9 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
WT_PAGE *page;
WT_ROW *rip;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool kpack_used;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = cbt->ref->page;
key = &cbt->iface.key;
@@ -536,17 +502,16 @@ restart_read_insert:
if ((ins = cbt->ins) != NULL) {
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- WT_RET(__wt_txn_read_upd_list(session, ins->upd, &upd));
- if (upd == NULL)
+ WT_RET(__wt_txn_read_upd_list(session, cbt, ins->upd));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* Check for the beginning of the page. */
@@ -574,17 +539,16 @@ restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
WT_RET(__wt_txn_read(
- session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL, &upd));
- if (upd == NULL)
+ session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
+ if (cbt->upd_value->type == WT_UPDATE_INVALID)
continue;
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->txnid != WT_TXN_NONE && __wt_txn_upd_visible_all(session, upd))
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE) {
+ if (cbt->upd_value->txnid != WT_TXN_NONE &&
+ __wt_txn_upd_value_visible_all(session, cbt->upd_value))
++cbt->page_deleted_count;
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
continue;
}
- return (__wt_value_return(cbt, upd));
+ return (__wt_value_return(cbt, cbt->upd_value));
}
/* NOTREACHED */
}
@@ -604,7 +568,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
bool newpage, restart;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_prev);
WT_STAT_DATA_INCR(session, cursor_prev);
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 61a0a2653f6..ccec03700d0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -60,7 +60,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt, bool search_operation)
WT_SESSION_IMPL *session;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
/*
* Check the page active flag, asserting the page reference with any external key.
@@ -171,21 +171,18 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
* Return if the cursor references an valid key/value pair.
*/
int
-__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE **updp, bool *valid)
+__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, bool *valid)
{
WT_BTREE *btree;
WT_CELL *cell;
WT_COL *cip;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
- if (updp != NULL)
- *updp = NULL;
*valid = false;
btree = cbt->btree;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* We may be pointing to an insert object, and we may have a page with
@@ -232,22 +229,22 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
* update that's been deleted is not a valid key/value pair).
*/
if (cbt->ins != NULL) {
- WT_RET(__wt_txn_read_upd_list(session, cbt->ins->upd, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- WT_ASSERT(session, !F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK));
+ WT_RET(__wt_txn_read_upd_list(session, cbt, cbt->ins->upd));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
return (0);
}
}
/*
+ * Clean out any stale value here. Calling a transaction read helper automatically clears this
+ * but we have some code paths that don't do this (fixed length column store is one example).
+ */
+ __wt_upd_value_clear(cbt->upd_value);
+
+ /*
* If we don't have an insert object, or in the case of column-store, there's an insert object
* but no update was visible to us and the key on the page is the same as the insert object's
* key, and the slot as set by the search function is valid, we can use the original page
@@ -299,17 +296,10 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
* Check for an update ondisk or in the history store. For column store, an insert object
* can have the same key as an on-page or history store object.
*/
- WT_RET(__wt_txn_read(session, cbt, key, recno, NULL, NULL, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
+ WT_RET(__wt_txn_read(session, cbt, key, recno, NULL, NULL));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
}
break;
@@ -335,17 +325,10 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, WT_UPDATE
(page->modify != NULL && page->modify->mod_row_update != NULL) ?
page->modify->mod_row_update[cbt->slot] :
NULL,
- NULL, &upd));
- if (upd != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
+ NULL));
+ if (cbt->upd_value->type != WT_UPDATE_INVALID) {
+ if (cbt->upd_value->type == WT_UPDATE_TOMBSTONE)
return (0);
- }
- if (updp != NULL)
- *updp = upd;
- else if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK))
- __wt_free_update_list(session, &upd);
*valid = true;
}
break;
@@ -363,7 +346,7 @@ __cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_WITH_PAGE_INDEX(
session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
@@ -379,7 +362,7 @@ __cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_WITH_PAGE_INDEX(
session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
@@ -429,7 +412,7 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt)
WT_SESSION_IMPL *session;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_reset);
WT_STAT_DATA_INCR(session, cursor_reset);
@@ -440,11 +423,11 @@ __wt_btcur_reset(WT_CURSOR_BTREE *cbt)
}
/*
- * __wt_btcur_search_uncommitted --
- * Search and return exact matching records only, including uncommitted ones.
+ * __wt_btcur_search_prepared --
+ * Search and return exact matching records only.
*/
int
-__wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
+__wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp)
{
WT_BTREE *btree;
WT_CURSOR_BTREE *cbt;
@@ -500,12 +483,6 @@ __wt_btcur_search_uncommitted(WT_CURSOR *cursor, WT_UPDATE **updp)
break;
}
- /*
- * Like regular uncommitted updates, pages with prepared updates are pinned to the cache and can
- * never be written to the history store. Therefore, there is no need to do a search here for
- * uncommitted updates.
- */
-
*updp = upd;
return (0);
}
@@ -522,13 +499,11 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_search);
WT_STAT_DATA_INCR(session, cursor_search);
@@ -557,11 +532,11 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, false, cbt->ref, &leaf_found));
if (leaf_found && cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, cbt->ref, &leaf_found));
if (leaf_found && cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
if (!valid) {
@@ -570,16 +545,16 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, false, NULL, NULL));
if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, NULL, NULL));
if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
if (valid)
- ret = __cursor_kv_return(cbt, upd);
+ ret = __cursor_kv_return(cbt, cbt->upd_value);
else if (__cursor_fix_implicit(btree, cbt)) {
/*
* Creating a record past the end of the tree in a fixed-length column-store implicitly
@@ -619,14 +594,12 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
int exact;
bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
- upd = NULL; /* -Wuninitialized */
+ session = CUR2S(cbt);
exact = 0;
WT_STAT_CONN_INCR(session, cursor_search_near);
@@ -671,7 +644,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (leaf_found &&
(cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
@@ -682,10 +655,10 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
} else {
WT_ERR(__cursor_col_search(cbt, NULL, NULL));
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &upd, &valid));
+ WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
}
}
@@ -706,7 +679,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (valid) {
exact = cbt->compare;
- ret = __cursor_kv_return(cbt, upd);
+ ret = __cursor_kv_return(cbt, cbt->upd_value);
} else if (__cursor_fix_implicit(btree, cbt)) {
cbt->recno = cursor->recno;
cbt->v = 0;
@@ -781,9 +754,12 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
insert_bytes = cursor->key.size + cursor->value.size;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
+ WT_RET_PANIC_ASSERT(
+ session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
+
WT_STAT_CONN_INCR(session, cursor_insert);
WT_STAT_DATA_INCR(session, cursor_insert);
WT_STAT_CONN_INCRV(session, cursor_insert_bytes, insert_bytes);
@@ -793,9 +769,6 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_RET(__cursor_size_chk(session, &cursor->key));
WT_RET(__cursor_size_chk(session, &cursor->value));
- WT_RET_ASSERT(
- session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
-
/* It's no longer possible to bulk-load into the tree. */
__wt_cursor_disable_bulk(session);
@@ -859,7 +832,9 @@ retry:
* If not overwriting, fail if the key exists, else insert the key/value pair.
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (valid)
WT_ERR(WT_DUPLICATE_KEY);
}
@@ -885,7 +860,9 @@ retry:
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
if (cbt->compare == 0) {
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
if (valid)
WT_ERR(WT_DUPLICATE_KEY);
} else if (__cursor_fix_implicit(btree, cbt))
@@ -932,7 +909,7 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = NULL;
if (cbt->compare != 0)
@@ -964,7 +941,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
uint64_t yield_count, sleep_usecs;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
WT_ASSERT(session, cbt->btree->type == BTREE_ROW);
@@ -1015,7 +992,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
searched = false;
@@ -1085,7 +1062,8 @@ retry:
if (cbt->compare != 0)
goto search_notfound;
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (!valid)
goto search_notfound;
@@ -1103,8 +1081,10 @@ retry:
/* Remove the record if it exists. */
valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ if (cbt->compare == 0) {
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
+ }
if (cbt->compare != 0 || !valid) {
if (!__cursor_fix_implicit(btree, cbt))
goto search_notfound;
@@ -1203,10 +1183,10 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
yield_count = sleep_usecs = 0;
- WT_RET_ASSERT(
+ WT_RET_PANIC_ASSERT(
session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree");
/* It's no longer possible to bulk-load into the tree. */
@@ -1287,7 +1267,9 @@ update_local:
WT_ERR(__curfile_update_check(cbt));
if (cbt->compare != 0)
WT_ERR(WT_NOTFOUND);
- WT_ERR(__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, NULL, &valid));
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, &valid));
+ WT_ERR(ret);
if (!valid)
WT_ERR(WT_NOTFOUND);
}
@@ -1302,8 +1284,11 @@ update_local:
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
valid = false;
- if (cbt->compare == 0)
- WT_ERR(__wt_cursor_valid(cbt, NULL, cbt->recno, NULL, &valid));
+ if (cbt->compare == 0) {
+ WT_WITH_UPDATE_VALUE_SKIP_BUF(
+ ret = __wt_cursor_valid(cbt, NULL, cbt->recno, &valid));
+ WT_ERR(ret);
+ }
if ((cbt->compare != 0 || !valid) && !__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
@@ -1375,7 +1360,7 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
cursor = &cbt->iface;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
upd = NULL;
if (cbt->ins != NULL)
@@ -1429,7 +1414,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
bool overwrite;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
/* Save the cursor state. */
__cursor_state_save(cursor, &state);
@@ -1465,7 +1450,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
WT_ERR(__wt_modify_pack(cursor, entries, nentries, &modify));
orig = cursor->value.size;
- WT_ERR(__wt_modify_apply(cursor, modify->data));
+ WT_ERR(__wt_modify_apply_item(session, cursor->value_format, &cursor->value, modify->data));
new = cursor->value.size;
WT_ERR(__cursor_size_chk(session, &cursor->value));
@@ -1515,7 +1500,7 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt)
bool overwrite;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_reserve);
WT_STAT_DATA_INCR(session, cursor_reserve);
@@ -1542,7 +1527,7 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cbt);
WT_STAT_CONN_INCR(session, cursor_update);
WT_STAT_DATA_INCR(session, cursor_update);
@@ -1568,7 +1553,7 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp)
a = (WT_CURSOR *)a_arg;
b = (WT_CURSOR *)b_arg;
- session = (WT_SESSION_IMPL *)a->session;
+ session = CUR2S(a_arg);
/* Confirm both cursors reference the same object. */
if (a_arg->btree != b_arg->btree)
@@ -1640,8 +1625,8 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
a = (WT_CURSOR *)a_arg;
b = (WT_CURSOR *)b_arg;
+ session = CUR2S(a_arg);
cmp = 0;
- session = (WT_SESSION_IMPL *)a->session;
/* Confirm both cursors reference the same object. */
if (a_arg->btree != b_arg->btree)
@@ -1673,7 +1658,7 @@ __cursor_truncate(
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- session = (WT_SESSION_IMPL *)start->iface.session;
+ session = CUR2S(start);
yield_count = sleep_usecs = 0;
/*
@@ -1729,7 +1714,7 @@ __cursor_truncate_fix(
uint64_t yield_count, sleep_usecs;
const uint8_t *value;
- session = (WT_SESSION_IMPL *)start->iface.session;
+ session = CUR2S(start);
yield_count = sleep_usecs = 0;
/*
@@ -1786,8 +1771,8 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)start->iface.session;
btree = start->btree;
+ session = CUR2S(start);
WT_STAT_DATA_INCR(session, cursor_truncate);
WT_RET(__wt_txn_autocommit_check(session));
@@ -1852,6 +1837,8 @@ __wt_btcur_open(WT_CURSOR_BTREE *cbt)
{
cbt->row_key = &cbt->_row_key;
cbt->tmp = &cbt->_tmp;
+ cbt->modify_update = &cbt->_modify_update;
+ cbt->upd_value = &cbt->_upd_value;
#ifdef HAVE_DIAGNOSTIC
cbt->lastkey = &cbt->_lastkey;
@@ -1869,7 +1856,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
WT_DECL_RET;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/*
* The in-memory split and history store table code creates low-level btree cursors to
@@ -1879,6 +1866,8 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
if (!lowlevel)
ret = __cursor_reset(cbt);
+ __wt_buf_free(session, &cbt->_modify_update.buf);
+ __wt_buf_free(session, &cbt->_upd_value.buf);
__wt_buf_free(session, &cbt->_row_key);
__wt_buf_free(session, &cbt->_tmp);
#ifdef HAVE_DIAGNOSTIC
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index e3e89620fd5..a835e593022 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -156,18 +156,16 @@ __debug_item_value(WT_DBG *ds, const char *tag, const void *data_arg, size_t siz
}
/*
- * __debug_time_pairs --
+ * __debug_time_window --
* Dump a set of start and stop time pairs, with an optional tag.
*/
static inline int
-__debug_time_pairs(WT_DBG *ds, const char *tag, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__debug_time_window(WT_DBG *ds, const char *tag, WT_TIME_WINDOW *tw)
{
- char tp_string[2][WT_TP_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
- return (ds->f(ds, "\t%s%s%s,%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
- __wt_time_pair_to_string(start_ts, start_txn, tp_string[0]),
- __wt_time_pair_to_string(stop_ts, stop_txn, tp_string[1])));
+ return (ds->f(ds, "\t%s%s%s\n", tag == NULL ? "" : tag, tag == NULL ? "" : " ",
+ __wt_time_window_to_string(tw, time_string)));
}
/*
@@ -711,15 +709,13 @@ int
__wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- WT_CURSOR *cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint32_t session_flags;
bool is_owner;
- cursor = cursor_arg;
- session = (WT_SESSION_IMPL *)cursor->session;
+ session = CUR2S(cursor_arg);
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner));
@@ -741,26 +737,24 @@ __wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor)
WT_DECL_ITEM(hs_key);
WT_DECL_ITEM(hs_value);
WT_DECL_RET;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
WT_UPDATE *upd;
- wt_timestamp_t hs_durable_ts;
- uint64_t hs_upd_type_full;
+ uint64_t hs_counter, hs_upd_type_full;
uint32_t hs_btree_id;
- uint8_t hs_prep_state, hs_upd_type;
+ uint8_t hs_upd_type;
ds = &_ds;
+ __wt_time_window_init(&tw);
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
WT_ERR(__debug_config(session, ds, NULL));
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &start.timestamp, &start.txnid,
- &stop.timestamp, &stop.txnid));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &tw.start_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &tw.stop_ts, &tw.durable_start_ts, &hs_upd_type_full, hs_value));
+ WT_ERR(__debug_time_window(ds, "T", &tw));
- WT_ERR(__debug_time_pairs(ds, "T", start.timestamp, start.txnid, stop.timestamp, stop.txnid));
-
- WT_ERR(
- hs_cursor->get_value(hs_cursor, &hs_durable_ts, &hs_prep_state, &hs_upd_type_full, hs_value));
hs_upd_type = (uint8_t)hs_upd_type_full;
switch (hs_upd_type) {
case WT_UPDATE_MODIFY:
@@ -806,8 +800,7 @@ __wt_debug_key_value(
WT_ERR(ds->f(ds, "\tK {%" PRIu64 " %" PRIu64 "}", recno, rle));
else
WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
- WT_ERR(__debug_time_pairs(
- ds, "T", value->start_ts, value->start_txn, value->stop_ts, value->stop_txn));
+ WT_ERR(__debug_time_window(ds, "T", &value->tw));
WT_ERR(__debug_cell_data(ds, NULL, value != NULL ? value->type : 0, "V", value));
err:
@@ -1350,8 +1343,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
{
WT_ADDR_COPY addr;
WT_SESSION_IMPL *session;
- char tp_string[2][WT_TP_STRING_SIZE];
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
session = ds->session;
@@ -1365,13 +1357,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, ", %s", "reading"));
if (__wt_ref_addr_copy(session, ref, &addr))
- WT_RET(ds->f(ds,
- ", start/stop durable ts %s,%s, start/stop ts/txn %s,%s, prepared updates: %s, %s",
- __wt_timestamp_to_string(addr.newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(addr.newest_stop_durable_ts, ts_string[1]),
- __wt_time_pair_to_string(addr.oldest_start_ts, addr.oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(addr.newest_stop_ts, addr.newest_stop_txn, tp_string[1]),
- addr.prepare ? "true" : "false",
+ WT_RET(ds->f(ds, "%s, %s", __wt_time_aggregate_to_string(&addr.ta, time_string),
__wt_addr_string(session, addr.addr, addr.size, ds->t1)));
return (ds->f(ds, "\n"));
}
@@ -1386,8 +1372,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_SESSION_IMPL *session;
- char tp_string[2][WT_TP_STRING_SIZE];
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
session = ds->session;
@@ -1429,11 +1414,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- WT_RET(ds->f(ds, ", ts/txn %s,%s,%s,%s",
- __wt_timestamp_to_string(unpack->newest_start_durable_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_durable_ts, ts_string[1]),
- __wt_time_pair_to_string(unpack->oldest_start_ts, unpack->oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(unpack->newest_stop_ts, unpack->newest_stop_txn, tp_string[1])));
+ WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -1441,9 +1422,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
- WT_RET(ds->f(ds, ", ts/txn %s,%s",
- __wt_time_pair_to_string(unpack->start_ts, unpack->start_txn, tp_string[0]),
- __wt_time_pair_to_string(unpack->stop_ts, unpack->stop_txn, tp_string[1])));
+ WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 94b544f6bc2..b9a3eed1c93 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -115,7 +115,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
goto err;
if (addr.type != WT_ADDR_LEAF_NO)
goto err;
- if (!__wt_txn_visible(session, addr.oldest_start_txn, addr.oldest_start_ts))
+ if (!__wt_txn_visible(session, addr.ta.oldest_start_txn, addr.ta.oldest_start_ts))
goto err;
/*
@@ -292,7 +292,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
WT_PAGE *page;
WT_PAGE_DELETED *page_del;
WT_ROW *rip;
- WT_TIME_PAIR start, stop;
+ WT_TIME_WINDOW tw;
WT_UPDATE **upd_array, *upd;
size_t size;
uint32_t count, i;
@@ -382,8 +382,8 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* Retrieve the stop time pair from the page's row. If we find an existing stop time pair we
* don't need to append a tombstone.
*/
- __wt_read_row_time_pairs(session, page, rip, &start, &stop);
- if (stop.timestamp == WT_TS_MAX && stop.txnid == WT_TXN_MAX) {
+ __wt_read_row_time_window(session, page, rip, &tw);
+ if (tw.stop_ts == WT_TS_MAX && tw.stop_txn == WT_TXN_MAX) {
WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
upd->next = upd_array[WT_ROW_SLOT(page, rip)];
upd_array[WT_ROW_SLOT(page, rip)] = upd;
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index d4d83783a1b..7fac3deabd5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -140,7 +140,7 @@ corrupt:
F_SET(S2C(session), WT_CONN_DATA_CORRUPTION);
if (!F_ISSET(btree, WT_BTREE_VERIFY) && !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) {
WT_TRET(bm->corrupt(bm, session, addr, addr_size));
- WT_PANIC_ERR(session, ret, "%s: fatal read error: %s", btree->dhandle->name, fail_msg);
+ WT_ERR_PANIC(session, ret, "%s: fatal read error: %s", btree->dhandle->name, fail_msg);
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index 72523b695de..cccd2c628a3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -65,7 +65,7 @@ __wt_ovfl_read(
*/
__wt_readlock(session, &S2BT(session)->ovfl_lock);
if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
- WT_ASSERT(session, __wt_txn_visible_all(session, unpack->stop_txn, unpack->stop_ts));
+ WT_ASSERT(session, __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.stop_ts));
ret = __wt_buf_setstr(session, store, "WT_CELL_VALUE_OVFL_RM");
*decoded = true;
} else
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index ac588bf901d..1a690b24804 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -544,13 +544,23 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
WT_CELL_UNPACK unpack;
+ WT_ITEM buf;
WT_ROW *rip;
+ WT_UPDATE **upd_array, *upd;
+ size_t size, total_size;
+ uint32_t i;
+ bool instantiate_prepared, prepare;
btree = S2BT(session);
+ prepare = false;
+
+ instantiate_prepared = F_ISSET_ATOMIC(page, WT_PAGE_INSTANTIATE_PREPARE_UPDATE);
/* Walk the page, building indices. */
rip = page->pg_row;
WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ if (instantiate_prepared && !prepare && F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE))
+ prepare = true;
switch (unpack.type) {
case WT_CELL_KEY_OVFL:
__wt_row_leaf_key_set_cell(page, rip, unpack.cell);
@@ -575,9 +585,9 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* The visibility information is not referenced on the page so we need to ensure that
* the value is globally visible at the point in time where we read the page into cache.
*/
- if (!btree->huffman_value && unpack.stop_txn == WT_TXN_MAX &&
- unpack.stop_ts == WT_TS_MAX &&
- __wt_txn_visible_all(session, unpack.start_txn, unpack.start_ts))
+ if (!btree->huffman_value && unpack.tw.stop_txn == WT_TXN_MAX &&
+ unpack.tw.stop_ts == WT_TS_MAX && !F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE) &&
+ __wt_txn_visible_all(session, unpack.tw.start_txn, unpack.tw.start_ts))
__wt_row_leaf_value_set(page, rip - 1, &unpack);
break;
case WT_CELL_VALUE_OVFL:
@@ -589,8 +599,47 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_CELL_FOREACH_END;
/*
- * We do not currently instantiate keys on leaf pages when the page is loaded, they're
- * instantiated on demand.
+ * Instantiate prepared updates on leaf pages when the page is loaded. For in-memory databases,
+ * all non obsolete updates will retain on the page as part of __split_multi_inmem function.
*/
+ if (prepare && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) {
+ WT_RET(__wt_page_modify_init(session, page));
+ if (!F_ISSET(btree, WT_BTREE_READONLY))
+ __wt_page_modify_set(session, page);
+
+ /* Allocate the per-page update array if one doesn't already exist. */
+ if (page->entries != 0 && page->modify->mod_row_update == NULL)
+ WT_RET(__wt_calloc_def(session, page->entries, &page->modify->mod_row_update));
+
+ /* For each entry in the page */
+ size = total_size = 0;
+ upd_array = page->modify->mod_row_update;
+ WT_ROW_FOREACH (page, rip, i) {
+ /* Unpack the on-page value cell. */
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ if (F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE)) {
+ if (unpack.tw.stop_ts == WT_TS_MAX && unpack.tw.stop_txn == WT_TXN_MAX) {
+ /* Take the value from the original page cell. */
+ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &buf));
+
+ WT_RET(__wt_upd_alloc(session, &buf, WT_UPDATE_STANDARD, &upd, &size));
+ upd->durable_ts = WT_TS_NONE;
+ upd->start_ts = unpack.tw.start_ts;
+ upd->txnid = unpack.tw.start_txn;
+ } else {
+ WT_RET(__wt_upd_alloc_tombstone(session, &upd, &size));
+ upd->durable_ts = WT_TS_NONE;
+ upd->start_ts = unpack.tw.stop_ts;
+ upd->txnid = unpack.tw.stop_txn;
+ }
+ upd->prepare_state = WT_PREPARE_INPROGRESS;
+ upd_array[WT_ROW_SLOT(page, rip)] = upd;
+ total_size += size;
+ }
+ }
+
+ __wt_cache_page_inmem_incr(session, page, total_size);
+ }
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index b3a8985fbe4..3f113e4b2dc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -13,10 +13,8 @@
* Check if the inserted key/value pair is valid.
*/
static int
-__random_insert_valid(
- WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_INSERT *ins, WT_UPDATE **updp, bool *validp)
+__random_insert_valid(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_INSERT *ins, bool *validp)
{
- *updp = NULL;
*validp = false;
__cursor_pos_clear(cbt);
@@ -27,7 +25,7 @@ __random_insert_valid(
cbt->tmp->data = WT_INSERT_KEY(ins);
cbt->tmp->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, updp, validp));
+ return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, validp));
}
/*
@@ -35,16 +33,15 @@ __random_insert_valid(
* Check if the slot key/value pair is valid.
*/
static int
-__random_slot_valid(WT_CURSOR_BTREE *cbt, uint32_t slot, WT_UPDATE **updp, bool *validp)
+__random_slot_valid(WT_CURSOR_BTREE *cbt, uint32_t slot, bool *validp)
{
- *updp = NULL;
*validp = false;
__cursor_pos_clear(cbt);
cbt->slot = slot;
cbt->compare = 0;
- return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, updp, validp));
+ return (__wt_cursor_valid(cbt, cbt->tmp, WT_RECNO_OOB, validp));
}
/* Magic constant: 5000 entries in a skip list is enough to forcibly evict. */
@@ -64,7 +61,7 @@ __random_skip_entries(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head)
uint32_t entries;
int level;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
entries = 0; /* [-Wconditional-uninitialized] */
if (ins_head == NULL)
@@ -106,18 +103,16 @@ __random_skip_entries(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head)
* Return a random key/value from a skip list.
*/
static int
-__random_leaf_skip(
- WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, uint32_t entries, WT_UPDATE **updp, bool *validp)
+__random_leaf_skip(WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, uint32_t entries, bool *validp)
{
WT_INSERT *ins, *saved_ins;
WT_SESSION_IMPL *session;
uint32_t i;
int retry;
- *updp = NULL;
*validp = false;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* This is a relatively expensive test, try a few times then quit. */
for (retry = 0; retry < WT_RANDOM_SKIP_RETRY; ++retry) {
@@ -136,7 +131,7 @@ __random_leaf_skip(
/* Try and return our selected record. */
if (ins != NULL) {
- WT_RET(__random_insert_valid(cbt, ins_head, ins, updp, validp));
+ WT_RET(__random_insert_valid(cbt, ins_head, ins, validp));
if (*validp)
return (0);
}
@@ -148,7 +143,7 @@ __random_leaf_skip(
ins = saved_ins;
}
for (; --i > 0 && ins != NULL; ins = WT_SKIP_NEXT(ins)) {
- WT_RET(__random_insert_valid(cbt, ins_head, ins, updp, validp));
+ WT_RET(__random_insert_valid(cbt, ins_head, ins, validp));
if (*validp)
return (0);
}
@@ -166,24 +161,23 @@ __random_leaf_skip(
* Look for a large insert list from which we can select a random item.
*/
static int
-__random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
+__random_leaf_insert(WT_CURSOR_BTREE *cbt, bool *validp)
{
WT_INSERT_HEAD *ins_head;
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t entries, slot, start;
- *updp = NULL;
*validp = false;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
/* Check for a large insert list with no items, that's common when tables are newly created. */
ins_head = WT_ROW_INSERT_SMALLEST(page);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_SMALLEST_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -199,7 +193,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT(page, &page->pg_row[slot]);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -208,7 +202,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT(page, &page->pg_row[slot]);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -219,7 +213,7 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
ins_head = WT_ROW_INSERT_SMALLEST(page);
entries = __random_skip_entries(cbt, ins_head);
if (entries >= WT_RANDOM_SKIP_INSERT_ENOUGH) {
- WT_RET(__random_leaf_skip(cbt, ins_head, entries, updp, validp));
+ WT_RET(__random_leaf_skip(cbt, ins_head, entries, validp));
if (*validp)
return (0);
}
@@ -234,25 +228,24 @@ __random_leaf_insert(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
* Return a random key/value from a page's on-disk entries.
*/
static int
-__random_leaf_disk(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *validp)
+__random_leaf_disk(WT_CURSOR_BTREE *cbt, bool *validp)
{
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t entries, slot;
int retry;
- *updp = NULL;
*validp = false;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
entries = cbt->ref->page->entries;
/* This is a relatively cheap test, so try several times. */
for (retry = 0; retry < WT_RANDOM_DISK_RETRY; ++retry) {
slot = __wt_random(&session->rnd) % entries;
WT_RET(__wt_row_leaf_key(session, page, page->pg_row + slot, cbt->tmp, false));
- WT_RET(__random_slot_valid(cbt, slot, updp, validp));
+ WT_RET(__random_slot_valid(cbt, slot, validp));
if (*validp)
break;
}
@@ -274,12 +267,11 @@ __random_leaf(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- WT_UPDATE *upd;
uint32_t i;
bool next, valid;
- cursor = (WT_CURSOR *)cbt;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ cursor = &cbt->iface;
+ session = CUR2S(cbt);
/*
* If the page has a sufficiently large number of disk-based entries, randomly select from them.
@@ -287,24 +279,24 @@ __random_leaf(WT_CURSOR_BTREE *cbt)
* a reasonable chunk of the name space.
*/
if (cbt->ref->page->entries > WT_RANDOM_DISK_ENOUGH) {
- WT_RET(__random_leaf_disk(cbt, &upd, &valid));
+ WT_RET(__random_leaf_disk(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
}
/* Look for any large insert list and select from it. */
- WT_RET(__random_leaf_insert(cbt, &upd, &valid));
+ WT_RET(__random_leaf_insert(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
/*
* Try again if there are at least a few hundred disk-based entries: this may be a normal leaf
* page with big items.
*/
if (cbt->ref->page->entries > WT_RANDOM_DISK_ENOUGH / 2) {
- WT_RET(__random_leaf_disk(cbt, &upd, &valid));
+ WT_RET(__random_leaf_disk(cbt, &valid));
if (valid)
- return (__cursor_kv_return(cbt, upd));
+ return (__cursor_kv_return(cbt, cbt->upd_value));
}
/*
@@ -484,7 +476,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
read_flags = WT_READ_RESTART_OK;
if (F_ISSET(cbt, WT_CBT_READ_ONCE))
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 5c8c0ea871a..4d83914e1a3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -157,6 +157,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* evicting that page and deciding that is a sign that eviction is unstuck.
*/
page_flags = WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
+ FLD_SET(page_flags, WT_PAGE_INSTANTIATE_PREPARE_UPDATE);
if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &notused));
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 353f159f6bb..5f29cf08691 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -76,13 +76,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session, const uint8_t *key, size_t key
WT_RET(__wt_calloc_one(session, &copy_addr));
copy->addr = copy_addr;
- copy_addr->newest_start_durable_ts = unpack->newest_start_durable_ts;
- copy_addr->oldest_start_ts = unpack->oldest_start_ts;
- copy_addr->oldest_start_txn = unpack->oldest_start_txn;
- copy_addr->newest_stop_durable_ts = unpack->newest_stop_durable_ts;
- copy_addr->newest_stop_ts = unpack->newest_stop_ts;
- copy_addr->newest_stop_txn = unpack->newest_stop_txn;
- copy_addr->prepare = F_ISSET(unpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&copy_addr->ta, &unpack->ta);
WT_RET(__wt_memdup(session, unpack->data, unpack->size, &copy_addr->addr));
copy_addr->size = (uint8_t)unpack->size;
copy_addr->type = unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 2061d561a7a..1a2360f6d09 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -23,7 +23,7 @@ __key_return(WT_CURSOR_BTREE *cbt)
page = cbt->ref->page;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -70,102 +70,71 @@ __key_return(WT_CURSOR_BTREE *cbt)
}
/*
- * __time_pairs_init --
- * Initialize the time pairs to globally visible.
+ * __read_col_time_window --
+ * Retrieve the time window from a column store cell.
*/
-static inline void
-__time_pairs_init(WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+static void
+__read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_WINDOW *tw)
{
- start->txnid = WT_TXN_NONE;
- start->timestamp = WT_TS_NONE;
- stop->txnid = WT_TXN_MAX;
- stop->timestamp = WT_TS_MAX;
+ WT_CELL_UNPACK unpack;
+
+ __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __time_pairs_set --
- * Set the time pairs.
+ * __wt_read_row_time_window --
+ * Retrieve the time window from a row.
*/
-static inline void
-__time_pairs_set(WT_TIME_PAIR *start, WT_TIME_PAIR *stop, WT_CELL_UNPACK *unpack)
+void
+__wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_WINDOW *tw)
{
- start->timestamp = unpack->start_ts;
- start->txnid = unpack->start_txn;
- stop->timestamp = unpack->stop_ts;
- stop->txnid = unpack->stop_txn;
+ WT_CELL_UNPACK unpack;
+
+ __wt_time_window_init(tw);
+ /*
+ * If a value is simple and is globally visible at the time of reading a page into cache, we set
+ * the time pairs as globally visible.
+ */
+ if (__wt_row_leaf_value_exists(rip))
+ return;
+
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __wt_read_cell_time_pairs --
+ * __wt_read_cell_time_window --
* Read the time pairs from the cell.
*/
void
-__wt_read_cell_time_pairs(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_read_cell_time_window(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_WINDOW *tw)
{
WT_PAGE *page;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = ref->page;
- WT_ASSERT(session, start != NULL && stop != NULL);
+ WT_ASSERT(session, tw != NULL);
/* Take the value from the original page cell. */
if (page->type == WT_PAGE_ROW_LEAF) {
- __wt_read_row_time_pairs(session, page, &page->pg_row[cbt->slot], start, stop);
+ __wt_read_row_time_window(session, page, &page->pg_row[cbt->slot], tw);
} else if (page->type == WT_PAGE_COL_VAR) {
- __wt_read_col_time_pairs(
- session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), start, stop);
+ __read_col_time_window(session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), tw);
} else {
/* WT_PAGE_COL_FIX: return the default time pairs. */
- __time_pairs_init(start, stop);
+ __wt_time_window_init(tw);
}
}
/*
- * __wt_read_col_time_pairs --
- * Retrieve the time pairs from a column store cell.
- */
-void
-__wt_read_col_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __wt_cell_unpack(session, page, cell, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
- * __wt_read_row_time_pairs --
- * Retrieve the time pairs from a row.
- */
-void
-__wt_read_row_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __time_pairs_init(start, stop);
- /*
- * If a value is simple and is globally visible at the time of reading a page into cache, we set
- * the time pairs as globally visible.
- */
- if (__wt_row_leaf_value_exists(rip))
- return;
-
- __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
* __wt_value_return_buf --
* Change a buffer to reference an internal original-page return value.
*/
int
-__wt_value_return_buf(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_WINDOW *tw)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -176,18 +145,12 @@ __wt_value_return_buf(
WT_SESSION_IMPL *session;
uint8_t v;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = ref->page;
cursor = &cbt->iface;
- if (start != NULL && stop != NULL)
- __time_pairs_init(start, stop);
-
- /* Must provide either both start and stop as output parameters or neither. */
- WT_ASSERT(session, (start != NULL && stop != NULL) || (start == NULL && stop == NULL));
-
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -195,14 +158,16 @@ __wt_value_return_buf(
* If a value is simple and is globally visible at the time of reading a page into cache, we
* encode its location into the WT_ROW.
*/
- if (__wt_row_leaf_value(page, rip, buf))
+ if (__wt_row_leaf_value(page, rip, buf)) {
+ if (tw != NULL)
+ __wt_time_window_init(tw);
return (0);
+ }
/* Take the value from the original page cell. */
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
@@ -210,17 +175,18 @@ __wt_value_return_buf(
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
__wt_cell_unpack(session, page, cell, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
/*
* WT_PAGE_COL_FIX: Take the value from the original page.
*
- * FIXME-PM-1523: Should also check visibility here
+ * FIXME-WT-6126: Should also check visibility here
*/
+ if (tw != NULL)
+ __wt_time_window_init(tw);
v = __bit_getv_recno(ref, cursor->recno, btree->bitcnt);
return (__wt_buf_set(session, buf, &v, 1));
}
@@ -232,95 +198,7 @@ __wt_value_return_buf(
static inline int
__value_return(WT_CURSOR_BTREE *cbt)
{
- return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL, NULL));
-}
-
-/*
- * __wt_value_return_upd --
- * Change the cursor to reference an internal update structure return value.
- */
-int
-__wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
-{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_MODIFY_VECTOR modifies;
- WT_SESSION_IMPL *session;
- WT_TIME_PAIR start, stop;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- __wt_modify_vector_init(session, &modifies);
-
- /*
- * We're passed a "standard" or "modified" update that's visible to us. Our caller should have
- * already checked for deleted items (we're too far down the call stack to return not-found).
- *
- * Fast path if it's a standard item, assert our caller's behavior.
- */
- if (upd->type == WT_UPDATE_STANDARD) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- /* Copy an external update, and delete after using it */
- WT_RET(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- __wt_free_update_list(session, &upd);
- } else {
- cursor->value.data = upd->data;
- cursor->value.size = upd->size;
- }
- return (0);
- }
- WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
-
- /*
- * Find a complete update.
- */
- for (; upd != NULL; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (WT_UPDATE_DATA_VALUE(upd))
- break;
-
- if (upd->type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
- }
-
- /*
- * If there's no full update, the base item is the on-page item. If the update is a tombstone,
- * the base item is an empty item.
- */
- if (upd == NULL) {
- /*
- * Callers of this function set the cursor slot to an impossible value to check we don't try
- * and return on-page values when the update list should have been sufficient (which
- * happens, for example, if an update list was truncated, deleting some standard update
- * required by a previous modify update). Assert the case.
- */
- WT_ASSERT(session, cbt->slot != UINT32_MAX);
-
- WT_ERR(__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, &start, &stop));
- /*
- * Applying modifies on top of a tombstone is invalid. So if we're using the onpage value,
- * the stop time pair should be unset.
- */
- WT_ASSERT(session, stop.txnid == WT_TXN_MAX && stop.timestamp == WT_TS_MAX);
- } else {
- /* The base update must not be a tombstone. */
- WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
- WT_ERR(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- }
-
- /*
- * Once we have a base item, roll forward through any visible modify updates.
- */
- while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &upd);
- WT_ERR(__wt_modify_apply(cursor, upd->data));
- }
-
-err:
- __wt_modify_vector_free(&modifies);
- return (ret);
+ return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL));
}
/*
@@ -352,20 +230,37 @@ __wt_key_return(WT_CURSOR_BTREE *cbt)
/*
* __wt_value_return --
- * Change the cursor to reference an internal return value.
+ * Change the cursor to reference an update return value.
*/
int
-__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
{
WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
cursor = &cbt->iface;
+ session = CUR2S(cbt);
F_CLR(cursor, WT_CURSTD_VALUE_EXT);
- if (upd == NULL)
+ if (upd_value->type == WT_UPDATE_INVALID) {
+ /*
+ * FIXME-WT-6127: This is a holdover from the pre-durable history read logic where we used
+ * to fallback to the on-page value if we didn't find a visible update elsewhere. This is
+ * still required for fixed length column store as we have issues with this table type in
+ * durable history which we're planning to address in PM-1814.
+ */
+ WT_ASSERT(session, cbt->btree->type == BTREE_COL_FIX);
WT_RET(__value_return(cbt));
- else
- WT_RET(__wt_value_return_upd(cbt, upd));
+ } else {
+ /*
+ * We're passed a "standard" update that's visible to us. Our caller should have already
+ * checked for deleted items (we're too far down the call stack to return not-found) and any
+ * modify updates should be have been reconstructed into a full standard update.
+ */
+ WT_ASSERT(session, upd_value->type == WT_UPDATE_STANDARD);
+ cursor->value.data = upd_value->buf.data;
+ cursor->value.size = upd_value->buf.size;
+ }
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index a42e11e1d8f..344c6a573d7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -186,12 +186,7 @@ __slvg_checkpoint(WT_SESSION_IMPL *session, WT_REF *root)
__wt_seconds(session, &ckptbase->sec);
WT_ERR(__wt_metadata_search(session, dhandle->name, &config));
WT_ERR(__wt_meta_block_metadata(session, config, ckptbase));
- ckptbase->start_durable_ts = WT_TS_NONE;
- ckptbase->oldest_start_ts = WT_TS_NONE;
- ckptbase->oldest_start_txn = WT_TXN_NONE;
- ckptbase->stop_durable_ts = WT_TS_NONE;
- ckptbase->newest_stop_ts = WT_TS_MAX;
- ckptbase->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_aggregate_init(&ckptbase->ta);
ckptbase->write_gen = btree->write_gen;
F_SET(ckptbase, WT_CKPT_ADD);
@@ -917,7 +912,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
*/
/* Case #2/8, #10, #11 */
if (a_trk->col_start > b_trk->col_start)
- WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+ WT_RET_PANIC(session, EINVAL, "unexpected merge array sort order");
if (a_trk->col_start == b_trk->col_start) { /* Case #1, #4 and #9 */
/*
@@ -1174,12 +1169,7 @@ __slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
* regardless of a value's timestamps or transaction IDs.
*/
WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_start_durable_ts = addr->newest_stop_durable_ts = addr->oldest_start_ts =
- WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- addr->prepare = false;
+ __wt_time_aggregate_init(&addr->ta);
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
addr->size = trk->trk_addr_size;
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
@@ -1323,7 +1313,7 @@ __slvg_col_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *
return (__slvg_ovfl_ref(session, ovfl, false));
}
- WT_PANIC_RET(session, EINVAL, "overflow record at column-store page merge not found");
+ WT_RET_PANIC(session, EINVAL, "overflow record at column-store page merge not found");
}
/*
@@ -1512,7 +1502,7 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
WT_RET(__wt_compare(session, btree->collator, A_TRK_STOP, B_TRK_STOP, &stop_cmp));
if (start_cmp > 0) /* Case #2/8, #10, #11 */
- WT_PANIC_RET(session, EINVAL, "unexpected merge array sort order");
+ WT_RET_PANIC(session, EINVAL, "unexpected merge array sort order");
if (start_cmp == 0) { /* Case #1, #4, #9 */
/*
@@ -1782,12 +1772,7 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
* regardless of a value's timestamps or transaction IDs.
*/
WT_ERR(__wt_calloc_one(session, &addr));
- addr->newest_start_durable_ts = addr->newest_stop_durable_ts = addr->oldest_start_ts =
- WT_TS_NONE;
- addr->oldest_start_txn = WT_TXN_NONE;
- addr->newest_stop_ts = WT_TS_MAX;
- addr->newest_stop_txn = WT_TXN_MAX;
- addr->prepare = false;
+ __wt_time_aggregate_init(&addr->ta);
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
addr->size = trk->trk_addr_size;
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
@@ -1992,7 +1977,7 @@ __slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *
return (__slvg_ovfl_ref(session, ovfl, true));
}
- WT_PANIC_RET(session, EINVAL, "overflow record at row-store page merge not found");
+ WT_RET_PANIC(session, EINVAL, "overflow record at row-store page merge not found");
}
/*
@@ -2270,7 +2255,7 @@ __slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk, bool multi_panic)
if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) {
if (!multi_panic)
return (__wt_set_return(session, EBUSY));
- WT_PANIC_RET(session, EINVAL,
+ WT_RET_PANIC(session, EINVAL,
"overflow record unexpectedly referenced multiple times "
"during leaf page merge");
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index a2d85f79db8..2a016d6d725 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -147,7 +147,7 @@ __split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page)
err:
/* Something really bad just happened. */
- WT_PANIC_RET(session, ret, "fatal error during page split");
+ WT_RET_PANIC(session, ret, "fatal error during page split");
}
#endif
@@ -249,13 +249,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_ref
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
__wt_cell_unpack(session, from_home, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
- addr->oldest_start_ts = unpack.oldest_start_ts;
- addr->oldest_start_txn = unpack.oldest_start_txn;
- addr->newest_start_durable_ts = unpack.newest_start_durable_ts;
- addr->newest_stop_ts = unpack.newest_stop_ts;
- addr->newest_stop_txn = unpack.newest_stop_txn;
- addr->newest_stop_durable_ts = unpack.newest_stop_durable_ts;
- addr->prepare = F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE);
+ __wt_time_aggregate_copy(&addr->ta, &unpack.ta);
WT_ERR(__wt_memdup(session, unpack.data, unpack.size, &addr->addr));
addr->size = (uint8_t)unpack.size;
switch (unpack.raw) {
@@ -574,17 +568,17 @@ err:
case WT_ERR_RETURN:
__wt_free_ref_index(session, root, alloc_index, true);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during root page split to deepen the tree");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during root page split "
- "to deepen the tree");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret,
+ "ignoring not-fatal error during root page split to deepen the tree");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during root page split to deepen the tree");
break;
}
return (ret);
@@ -877,17 +871,16 @@ err:
if (empty_parent)
ret = __wt_set_return(session, EBUSY);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during parent page split");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during parent page "
- "split");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret, "ignoring not-fatal error during parent page split");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during parent page split");
break;
}
__wt_scr_free(session, &scr);
@@ -1154,17 +1147,16 @@ err:
}
__wt_free_ref_index(session, page, alloc_index, true);
break;
- case WT_ERR_PANIC:
- __wt_err(session, ret, "fatal error during internal page split");
- ret = WT_PANIC;
- break;
case WT_ERR_IGNORE:
- if (ret != 0 && ret != WT_PANIC) {
- __wt_err(session, ret,
- "ignoring not-fatal error during internal page "
- "split");
+ if (ret != WT_PANIC) {
+ if (ret != 0)
+ __wt_err(session, ret, "ignoring not-fatal error during internal page split");
ret = 0;
+ break;
}
+ /* FALLTHROUGH */
+ case WT_ERR_PANIC:
+ ret = __wt_panic(session, ret, "fatal error during internal page split");
break;
}
return (ret);
@@ -1391,7 +1383,7 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
WT_SAVE_UPD *supd;
WT_UPDATE *prev_onpage, *upd;
uint64_t recno;
- uint32_t i, slot;
+ uint32_t i, page_flags, slot;
/*
* In 04/2016, we removed column-store record numbers from the WT_PAGE structure, leading to
@@ -1413,7 +1405,8 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
* our caller will not discard the disk image when discarding the original page, and our caller
* will discard the allocated page on error, when discarding the allocated WT_REF.
*/
- WT_RET(__wt_page_inmem(session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, &page));
+ page_flags = WT_PAGE_DISK_ALLOC | WT_PAGE_INSTANTIATE_PREPARE_UPDATE;
+ WT_RET(__wt_page_inmem(session, ref, multi->disk_image, page_flags, &page));
multi->disk_image = NULL;
/*
@@ -1704,13 +1697,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_R
if (multi->addr.addr != NULL) {
WT_RET(__wt_calloc_one(session, &addr));
ref->addr = addr;
- addr->oldest_start_ts = multi->addr.oldest_start_ts;
- addr->oldest_start_txn = multi->addr.oldest_start_txn;
- addr->newest_start_durable_ts = multi->addr.newest_start_durable_ts;
- addr->newest_stop_ts = multi->addr.newest_stop_ts;
- addr->newest_stop_txn = multi->addr.newest_stop_txn;
- addr->newest_stop_durable_ts = multi->addr.newest_stop_durable_ts;
- addr->prepare = multi->addr.prepare;
+ __wt_time_aggregate_copy(&addr->ta, &multi->addr.ta);
WT_RET(__wt_memdup(session, multi->addr.addr, multi->addr.size, &addr->addr));
addr->size = multi->addr.size;
addr->type = multi->addr.type;
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 851a407f165..fd36f6b24f9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -220,8 +220,8 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
if (previous_state == WT_REF_DISK) {
/* There should be an address, but simply skip any page where we don't find one. */
if (__wt_ref_addr_copy(session, ref, &addr)) {
- newest_stop_ts = addr.newest_stop_ts;
- newest_stop_txn = addr.newest_stop_txn;
+ newest_stop_ts = addr.ta.newest_stop_ts;
+ newest_stop_txn = addr.ta.newest_stop_txn;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
}
@@ -274,21 +274,21 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
/* Calculate the max stop time pair by traversing all multi addresses. */
for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
- newest_stop_txn = WT_MAX(newest_stop_txn, multi->addr.newest_stop_txn);
- newest_stop_ts = WT_MAX(newest_stop_ts, multi->addr.newest_stop_ts);
+ newest_stop_txn = WT_MAX(newest_stop_txn, multi->addr.ta.newest_stop_txn);
+ newest_stop_ts = WT_MAX(newest_stop_ts, multi->addr.ta.newest_stop_ts);
}
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) {
tag = "reconciled replacement block";
- newest_stop_txn = mod->mod_replace.newest_stop_txn;
- newest_stop_ts = mod->mod_replace.newest_stop_ts;
+ newest_stop_txn = mod->mod_replace.ta.newest_stop_txn;
+ newest_stop_ts = mod->mod_replace.ta.newest_stop_ts;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else if (__wt_ref_addr_copy(session, ref, &addr)) {
tag = "WT_REF address";
- newest_stop_txn = addr.newest_stop_txn;
- newest_stop_ts = addr.newest_stop_ts;
+ newest_stop_txn = addr.ta.newest_stop_txn;
+ newest_stop_ts = addr.ta.newest_stop_ts;
obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
} else
tag = "unexpected page state";
@@ -469,12 +469,7 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
btree->syncing = WT_BTREE_SYNC_RUNNING;
is_hs = WT_IS_HS(btree);
- /*
- * Add in history store reconciliation for standard files.
- *
- * FIXME-PM-1521: Remove the history store check, and assert that no updates from the
- * history store are copied to the history store recursively.
- */
+ /* Add in history store reconciliation for standard files. */
rec_flags = WT_REC_CHECKPOINT;
if (!is_hs && !WT_IS_METADATA(btree->dhandle))
rec_flags |= WT_REC_HS;
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index c9708e9511b..0b3d4da2459 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -255,17 +255,13 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
* Create a fake, unpacked parent cell for the tree based on the checkpoint information.
*/
memset(&addr_unpack, 0, sizeof(addr_unpack));
- addr_unpack.newest_start_durable_ts = ckpt->start_durable_ts;
- addr_unpack.newest_stop_durable_ts = ckpt->stop_durable_ts;
- addr_unpack.oldest_start_ts = ckpt->oldest_start_ts;
- addr_unpack.newest_stop_ts = ckpt->newest_stop_ts;
- if (ckpt->write_gen > S2C(session)->base_write_gen) {
- addr_unpack.oldest_start_txn = ckpt->oldest_start_txn;
- addr_unpack.newest_stop_txn = ckpt->newest_stop_txn;
- } else {
- addr_unpack.oldest_start_txn = WT_TXN_NONE;
- addr_unpack.newest_stop_txn = WT_TXN_MAX;
+ __wt_time_aggregate_copy(&addr_unpack.ta, &ckpt->ta);
+ if (ckpt->write_gen <= S2C(session)->base_write_gen) {
+ addr_unpack.ta.oldest_start_txn = WT_TXN_NONE;
+ addr_unpack.ta.newest_stop_txn = WT_TXN_MAX;
}
+ if (ckpt->ta.prepare)
+ F_SET(&addr_unpack, WT_CELL_UNPACK_PREPARE);
addr_unpack.raw = WT_CELL_ADDR_INT;
/* Verify the tree. */
@@ -367,15 +363,14 @@ __verify_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
WT_ADDR_COPY addr;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- char tp_string[2][WT_TP_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
if (__wt_ref_addr_copy(session, ref, &addr)) {
- WT_ERR(__wt_buf_fmt(session, buf, "%s %s,%s",
- __wt_addr_string(session, addr.addr, addr.size, tmp),
- __wt_time_pair_to_string(addr.oldest_start_ts, addr.oldest_start_txn, tp_string[0]),
- __wt_time_pair_to_string(addr.newest_stop_ts, addr.newest_stop_txn, tp_string[1])));
+ WT_ERR(
+ __wt_buf_fmt(session, buf, "%s %s", __wt_addr_string(session, addr.addr, addr.size, tmp),
+ __wt_time_aggregate_to_string(&addr.ta, time_string)));
} else
WT_ERR(__wt_buf_fmt(session, buf, "%s -/-,-/-", __wt_addr_string(session, NULL, 0, tmp)));
@@ -391,28 +386,41 @@ err:
static int
__verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
- if (unpack->oldest_start_ts != WT_TS_NONE && unpack->newest_stop_ts == WT_TS_NONE)
+ if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has a newest stop "
"timestamp of 0",
__verify_addr_string(session, ref, vs->tmp1));
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
- "timestamp %s newer than its newest stop timestamp %s",
+ "timestamp newer than its newest stop timestamp; time window %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
- "transaction (%" PRIu64
- ") newer than its newest stop "
- "transaction (%" PRIu64 ")",
- __verify_addr_string(session, ref, vs->tmp1), unpack->oldest_start_txn,
- unpack->newest_stop_txn);
+ "transaction newer than its newest stop "
+ "transaction; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has an oldest start "
+ "timestamp newer than its newest start durable "
+ "timestamp; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
+ unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "internal page reference at %s has a newest stop "
+ "timestamp newer than its newest stop durable "
+ "timestamp; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack->ta, time_string));
return (0);
}
@@ -781,7 +789,7 @@ __verify_ts_stable_cmp(WT_SESSION_IMPL *session, WT_ITEM *key, WT_REF *ref, uint
{
WT_BTREE *btree;
WT_DECL_RET;
- char tp_string[2][WT_TP_STRING_SIZE];
+ char tp_string[2][WT_TS_INT_STRING_SIZE];
bool start;
btree = S2BT(session);
@@ -949,7 +957,7 @@ __verify_page_content(
uint64_t recno, rle;
uint32_t cell_num;
uint8_t *p;
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
bool found_ovfl;
btree = S2BT(session);
@@ -992,108 +1000,126 @@ __verify_page_content(
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- if (unpack.oldest_start_ts != WT_TS_NONE && unpack.newest_stop_ts == WT_TS_NONE)
+ if (unpack.ta.oldest_start_ts != WT_TS_NONE && unpack.ta.newest_stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "newest stop timestamp of 0",
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
- if (unpack.oldest_start_ts > unpack.newest_stop_ts)
+ "newest stop timestamp of 0; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.oldest_start_ts > unpack.ta.newest_stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has an "
- "oldest start timestamp %s newer than "
- "its newest stop timestamp %s",
+ "oldest start timestamp newer than "
+ "its newest stop timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack.oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack.newest_stop_ts, ts_string[1]));
- if (unpack.oldest_start_txn > unpack.newest_stop_txn) {
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.oldest_start_txn > unpack.ta.newest_stop_txn) {
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has an "
- "oldest start transaction (%" PRIu64
- ") "
- "newer than its newest stop transaction "
- "(%" PRIu64 ")",
+ " on page "
+ "at %s has an oldest start transaction newer than "
+ "its newest stop transaction; time aggregate %s ",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- unpack.oldest_start_txn, unpack.newest_stop_txn);
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
}
+ if (unpack.ta.oldest_start_ts > unpack.ta.newest_start_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32
+ " on page at %s has an "
+ "oldest start timestamp newer than "
+ "its newest start durable timestamp; time aggregate %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ if (unpack.ta.newest_stop_ts != WT_TS_MAX &&
+ unpack.ta.newest_stop_ts > unpack.ta.newest_stop_durable_ts)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32
+ " on page at %s has a "
+ "newest stop timestamp newer than "
+ "its newest stop durable timestamp; time aggregate %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(&unpack.ta, time_string));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
- * unpack.newest_start_durable_ts, "start durable",
- * addr_unpack->newest_start_durable_ts, false, vs));
- */
+ if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
+ unpack.ta.newest_start_durable_ts, "start durable",
+ addr_unpack->ta.newest_start_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.oldest_start_ts, "oldest start", addr_unpack->oldest_start_ts, true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.oldest_start_txn, "oldest start", addr_unpack->oldest_start_txn, true, dsk,
+ unpack.ta.oldest_start_ts, "oldest start", addr_unpack->ta.oldest_start_ts, true,
vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
+ unpack.ta.oldest_start_txn, "oldest start", addr_unpack->ta.oldest_start_txn, true,
+ dsk, vs));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
- * unpack.newest_stop_durable_ts, "stop durable", addr_unpack->newest_stop_durable_ts,
- * false, vs));
- */
+ if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
+ unpack.ta.newest_stop_durable_ts, "stop durable",
+ addr_unpack->ta.newest_stop_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.newest_stop_ts, "newest stop", addr_unpack->newest_stop_ts, false, vs));
+ unpack.ta.newest_stop_ts, "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.newest_stop_txn, "newest stop", addr_unpack->newest_stop_txn, false, dsk, vs));
- WT_RET(__verify_ts_stable_cmp(
- session, NULL, ref, cell_num - 1, addr_unpack->start_ts, addr_unpack->stop_ts, vs));
+ unpack.ta.newest_stop_txn, "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk,
+ vs));
+ WT_RET(__verify_ts_stable_cmp(session, NULL, ref, cell_num - 1,
+ addr_unpack->ta.oldest_start_ts, addr_unpack->ta.newest_stop_ts, vs));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
case WT_CELL_VALUE_COPY:
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_SHORT:
- if (unpack.start_ts != WT_TS_NONE && unpack.stop_ts == WT_TS_NONE)
+ if (unpack.tw.start_ts != WT_TS_NONE && unpack.tw.stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1));
- if (unpack.start_ts > unpack.stop_ts)
+ "timestamp of 0; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_ts > unpack.tw.stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "start timestamp %s newer than its stop "
- "timestamp %s",
+ "start timestamp newer than its stop "
+ "timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_timestamp_to_string(unpack.start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack.stop_ts, ts_string[1]));
- if (unpack.start_txn > unpack.stop_txn)
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_txn > unpack.tw.stop_txn)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
" on page at %s has a "
- "start transaction %" PRIu64
- "newer than "
- "its stop transaction %" PRIu64,
- cell_num - 1, __verify_addr_string(session, ref, vs->tmp1), unpack.start_txn,
- unpack.stop_txn);
-
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(
- * __verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.durable_start_ts,
- * "durable start", addr_unpack->newest_start_durable_ts, true, vs));
- */
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_ts,
- "oldest start", addr_unpack->oldest_start_ts, true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.start_txn,
- "oldest start", addr_unpack->oldest_start_txn, true, dsk, vs));
- /*
- * FIXME-prepare-support: Enable verification once all durable is finished.
- *
- * WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start",
- * unpack.durable_stop_ts,
- * "durable stop", addr_unpack->newest_stop_durable_ts, true, vs));
- */
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_ts,
- "newest stop", addr_unpack->newest_stop_ts, false, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.stop_txn,
- "newest stop", addr_unpack->newest_stop_txn, false, dsk, vs));
+ "start transaction newer than "
+ "its stop transaction; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.start_ts > unpack.tw.durable_start_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "start timestamp newer than its start durable "
+ "timestamp; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+ if (unpack.tw.stop_ts != WT_TS_MAX && unpack.tw.stop_ts > unpack.tw.durable_stop_ts)
+ WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
+ " on page at %s has a "
+ "stop timestamp newer than its stop durable "
+ "timestamp; time window %s",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_window_to_string(&unpack.tw, time_string));
+
+ if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
+ unpack.tw.durable_start_ts, "newest durable start",
+ addr_unpack->ta.newest_start_durable_ts, false, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_ts,
+ "oldest start", addr_unpack->ta.oldest_start_ts, true, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_txn,
+ "oldest start", addr_unpack->ta.oldest_start_txn, true, dsk, vs));
+ if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
+ unpack.tw.durable_stop_ts, "newest durable stop",
+ addr_unpack->ta.newest_stop_durable_ts, false, vs));
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_ts,
+ "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_txn,
+ "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk, vs));
WT_RET(__verify_ts_stable_cmp(
- session, NULL, ref, cell_num - 1, unpack.start_ts, unpack.stop_ts, vs));
+ session, NULL, ref, cell_num - 1, unpack.tw.start_ts, unpack.tw.stop_ts, vs));
break;
}
@@ -1106,7 +1132,7 @@ __verify_page_content(
continue;
WT_RET(__wt_row_leaf_key(session, page, rip++, vs->tmp1, false));
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
@@ -1117,7 +1143,7 @@ __verify_page_content(
p = vs->tmp1->mem;
WT_RET(__wt_vpack_uint(&p, 0, recno));
vs->tmp1->size = WT_PTRDIFF(p, vs->tmp1->mem);
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index 7b80327a22c..a1e96d41dc9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -268,7 +268,7 @@ static int
__verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t cell_num,
WT_ADDR *addr, const char *tag, const WT_PAGE_HEADER *dsk)
{
- char ts_string[2][WT_TS_INT_STRING_SIZE];
+ char time_string[WT_TIME_STRING_SIZE];
/*
* Check timestamp and transaction order, and optionally against parent values. Timestamps and
@@ -284,43 +284,57 @@ __verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- if (unpack->oldest_start_ts != WT_TS_NONE && unpack->newest_stop_ts == WT_TS_NONE)
+ if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a newest stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->oldest_start_ts > unpack->newest_stop_ts)
+ "timestamp of 0; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest "
+ "start timestamp newer than its newest stop "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has an oldest "
- "start timestamp %s newer than its newest stop "
- "timestamp %s",
- cell_num - 1, tag, __wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[1]));
- if (unpack->oldest_start_txn > unpack->newest_stop_txn)
+ "start transaction newer than its "
+ "newest stop transaction; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has an oldest "
- "start transaction %" PRIu64
- " newer than its "
- "newest stop transaction %" PRIu64,
- cell_num - 1, tag, unpack->oldest_start_txn, unpack->newest_stop_txn);
+ "start timestamp newer than its newest start durable "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
+ unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a newest "
+ "stop timestamp newer than its newest stop durable "
+ "timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
if (addr == NULL)
break;
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
- unpack->newest_start_durable_ts, "start durable", addr->newest_start_durable_ts, false,
- tag));
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
+ unpack->ta.newest_start_durable_ts, "start durable", addr->ta.newest_start_durable_ts,
+ false, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->oldest_start_ts, "oldest start", addr->oldest_start_ts, true, tag));
+ unpack->ta.oldest_start_ts, "oldest start", addr->ta.oldest_start_ts, true, tag));
WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->oldest_start_txn, "oldest start", addr->oldest_start_txn, true, tag, dsk));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
- unpack->newest_stop_durable_ts, "stop durable", addr->newest_stop_durable_ts, false,
- tag));
+ unpack->ta.oldest_start_txn, "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
+
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
+ unpack->ta.newest_stop_durable_ts, "stop durable", addr->ta.newest_stop_durable_ts,
+ false, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->newest_stop_ts, "newest stop", addr->newest_stop_ts, false, tag));
+ unpack->ta.newest_stop_ts, "newest stop", addr->ta.newest_stop_ts, false, tag));
WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->newest_stop_txn, "newest stop", addr->newest_stop_txn, false, tag, dsk));
+ unpack->ta.newest_stop_txn, "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -328,36 +342,52 @@ __verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
case WT_CELL_VALUE_SHORT:
- if (unpack->start_ts != WT_TS_NONE && unpack->stop_ts == WT_TS_NONE)
+ if (unpack->tw.start_ts != WT_TS_NONE && unpack->tw.stop_ts == WT_TS_NONE)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a stop "
- "timestamp of 0",
- cell_num - 1, tag);
- if (unpack->start_ts > unpack->stop_ts)
+ "timestamp of 0; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_ts > unpack->tw.stop_ts)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a start "
- "timestamp %s newer than its stop timestamp %s",
- cell_num - 1, tag, __wt_timestamp_to_string(unpack->start_ts, ts_string[0]),
- __wt_timestamp_to_string(unpack->stop_ts, ts_string[1]));
- if (unpack->start_txn > unpack->stop_txn)
+ "timestamp newer than its stop timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_txn > unpack->tw.stop_txn)
WT_RET_VRFY(session, "cell %" PRIu32
" on page at %s has a start "
- "transaction %" PRIu64
- " newer than its stop "
- "transaction %" PRIu64,
- cell_num - 1, tag, unpack->start_txn, unpack->stop_txn);
+ "transaction newer than its stop "
+ "transaction; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.start_ts > unpack->tw.durable_start_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a start "
+ "timestamp newer than its durable start timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
+ if (unpack->tw.stop_ts != WT_TS_MAX && unpack->tw.stop_ts > unpack->tw.durable_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a stop "
+ "timestamp newer than its durable stop timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
if (addr == NULL)
break;
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->start_ts,
- "oldest start", addr->oldest_start_ts, true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->start_txn,
- "oldest start", addr->oldest_start_txn, true, tag, dsk));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->stop_ts,
- "newest stop", addr->newest_stop_ts, false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->stop_txn,
- "newest stop", addr->newest_stop_txn, false, tag, dsk));
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
+ unpack->tw.durable_start_ts, "newest start durable", addr->ta.newest_start_durable_ts,
+ false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_ts,
+ "oldest start", addr->ta.oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_txn,
+ "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
+ unpack->tw.durable_stop_ts, "newest stop durable", addr->ta.newest_stop_durable_ts,
+ false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_ts,
+ "newest stop", addr->ta.newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_txn,
+ "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
break;
}
@@ -707,10 +737,7 @@ __verify_dsk_col_var(
struct {
const void *data;
size_t size;
- wt_timestamp_t start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- uint64_t stop_txn;
+ WT_TIME_WINDOW tw;
bool deleted;
} last;
WT_BM *bm;
@@ -728,10 +755,7 @@ __verify_dsk_col_var(
last.data = NULL;
last.size = 0;
- last.start_ts = WT_TS_NONE;
- last.start_txn = WT_TXN_NONE;
- last.stop_ts = WT_TS_NONE;
- last.stop_txn = WT_TXN_NONE;
+ __wt_time_window_init(&last.tw);
last.deleted = false;
cell_num = 0;
@@ -760,11 +784,11 @@ __verify_dsk_col_var(
}
/*
- * Compare the last two items and see if reconciliation missed a chance for RLE encoding. We
- * don't have to care about data encoding or anything else, a byte comparison is enough.
+ * Compare the last two items and see if reconciliation missed a chance for RLE encoding.
+ * The time windows must match and we otherwise don't have to care about data encoding, a
+ * byte comparison is enough.
*/
- if (unpack->start_ts != last.start_ts || unpack->start_txn != last.start_txn ||
- unpack->stop_ts != last.stop_ts || unpack->stop_txn != last.stop_txn)
+ if (!__wt_time_windows_equal(&unpack->tw, &last.tw))
;
else if (last.deleted) {
if (cell_type == WT_CELL_DEL)
@@ -777,10 +801,7 @@ match_err:
"have been run-length encoded",
cell_num - 1, cell_num, tag);
- last.start_ts = unpack->start_ts;
- last.start_txn = unpack->start_txn;
- last.stop_ts = unpack->stop_ts;
- last.stop_txn = unpack->stop_txn;
+ __wt_time_window_copy(&last.tw, &unpack->tw);
switch (cell_type) {
case WT_CELL_DEL:
last.data = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index bfd3ecb9f5c..a4a4f8b662d 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -34,7 +34,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
btree = cbt->btree;
ins = NULL;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = upd_arg;
append = logged = false;
@@ -137,7 +137,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
}
/* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
/*
* Point the new WT_UPDATE item to the next element in the list. If we get it right, the
@@ -188,7 +188,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U
logged = true;
/* Avoid a data copy in WT_CURSOR.update. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else
upd_size = __wt_update_list_memsize(upd);
ins->upd = upd;
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index a6d56c9499d..e98cf094421 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -74,7 +74,7 @@ __wt_col_search(
uint32_t base, indx, limit, read_flags;
int depth;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
current = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index b7b1c5edff8..6aa44046cb8 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -58,7 +58,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
ins = NULL;
page = cbt->ref->page;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
upd = upd_arg;
logged = false;
@@ -109,7 +109,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
logged = true;
/* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else {
upd_size = __wt_update_list_memsize(upd);
@@ -169,7 +169,7 @@ __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value,
logged = true;
/* Avoid WT_CURSOR.update data copy. */
- cbt->modify_update = upd;
+ __wt_upd_value_assign(cbt->modify_update, upd);
} else
upd_size = __wt_update_list_memsize(upd);
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 98ae6f66daf..917705f6f9c 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -224,7 +224,7 @@ __wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *le
int cmp, depth;
bool append_check, descend_right, done;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
collator = btree->collator;
item = cbt->tmp;