summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-05-12 15:56:27 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-12 06:19:54 +0000
commit0f9e013c5f3cf3a8fa91b7b5e757d00bb2deae5b (patch)
treec1bc15ecb2a89ecccab6f73bdbc8b9b7f7b90bca
parentbe4fd3cca7fc4ea6883ceb1e262b0ac5cc7ecf69 (diff)
downloadmongo-0f9e013c5f3cf3a8fa91b7b5e757d00bb2deae5b.tar.gz
Import wiredtiger: bdff12c2331ab0478a22309a6d35519d2e2ca441 from branch mongodb-4.4
ref: 404b4a70af..bdff12c233 for: 4.5.1 WT-5864 Append globally visible tombstone with WT_TS_NONE to the update chain WT-6063 Re-enable checkpoint-filetypes-test in Evergreen WT-6065 Re-enable spinlock-gcc-test in Evergreen WT-6092 Use durable timestamp for global visibility check instead of commit timestamp WT-6111 Rework cell structures and unpacking WT-6157 Disable table logging in workgen stress test while running prepare transactions WT-6159 Tag verbose messages to make them easier to distinguish WT-6160 Fix format failure caused by stack overwrite WT-6161 Fix format hang when WiredTiger internal checkpoints are configured WT-6162 Fix incorrectly counts failures in format.sh WT-6166 KEY/VALUE short cells have to handle copy cells
-rw-r--r--src/third_party/wiredtiger/.clang-format3
-rwxr-xr-xsrc/third_party/wiredtiger/bench/workgen/runner/prepare_stress.py2
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c449
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c38
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c27
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c34
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c348
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c654
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c8
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i20
-rw-r--r--src/third_party/wiredtiger/src/include/cell.h79
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i260
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i4
-rw-r--r--src/third_party/wiredtiger/src/include/error.h11
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h46
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h3
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h1
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.i2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i20
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h8
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c8
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c20
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c70
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c2
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c10
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml8
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c87
-rw-r--r--src/third_party/wiredtiger/test/format/checkpoint.c11
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/format.sh16
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c10
-rw-r--r--src/third_party/wiredtiger/test/format/t.c2
45 files changed, 1365 insertions, 982 deletions
diff --git a/src/third_party/wiredtiger/.clang-format b/src/third_party/wiredtiger/.clang-format
index 573572df901..79eb168c0c0 100644
--- a/src/third_party/wiredtiger/.clang-format
+++ b/src/third_party/wiredtiger/.clang-format
@@ -49,7 +49,8 @@ ForEachMacros:
- Q_FOREACH
- BOOST_FOREACH
- TAILQ_FOREACH
- - WT_CELL_FOREACH_BEGIN
+ - WT_CELL_FOREACH_ADDR
+ - WT_CELL_FOREACH_KV
- WT_CELL_FOREACH_VRFY
- WT_CKPT_FOREACH
- WT_COL_FOREACH
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/prepare_stress.py b/src/third_party/wiredtiger/bench/workgen/runner/prepare_stress.py
index a79edf71af8..18751888a03 100755
--- a/src/third_party/wiredtiger/bench/workgen/runner/prepare_stress.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/prepare_stress.py
@@ -96,7 +96,7 @@ for i in range(0, table_count):
tname = "table:test" + str(i)
table = Table(tname)
s.create(tname, wtperf_table_config +\
- compress_table_config + table_config)
+ compress_table_config + table_config + ",log=(enabled=false)")
table.options.key_size = 200
table.options.value_size = 5000
tables.append(table)
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index 6c67f53a7ee..71760c1b966 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -26,6 +26,7 @@ WT_BLOCK_HEADER_SIZE
WT_CACHE_LINE_ALIGNMENT
WT_CACHE_LINE_PAD_BEGIN
WT_CACHE_LINE_PAD_END
+WT_CELL_COMMON_FIELDS
WT_CKPT_BLOCK_MODS
WT_CLOCKDIFF_NS
WT_CONN_CHECK_PANIC
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index e9422174821..d8fc8e08f4a 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -298,6 +298,7 @@ Obama
Opcode
Outfmt
PARAM
+PFX
POSIX
PPC
PREDEFINE
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index a08ef7d90d9..e64af3c37d8 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "404b4a70af14e7d3aecf7f206380884af5d06786"
+ "commit": "bdff12c2331ab0478a22309a6d35519d2e2ca441"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 9ea91c6f421..47d0907a0d7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -179,7 +179,7 @@ static inline int
__cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_CELL *cell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_COL *cip;
WT_INSERT *ins;
WT_PAGE *page;
@@ -245,7 +245,7 @@ restart_read:
*/
if (cbt->cip_saved != cip) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if ((rle = __wt_cell_rle(&unpack)) == 1)
continue;
@@ -295,7 +295,7 @@ restart_read:
static inline int
__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_CELL_UNPACK kpack;
+ WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
WT_ITEM *key;
WT_PAGE *page;
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index f8db9cd6233..4d6f62a10b7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -318,7 +318,7 @@ static inline int
__cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
WT_CELL *cell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_COL *cip;
WT_INSERT *ins;
WT_PAGE *page;
@@ -385,7 +385,7 @@ restart_read:
*/
if (cbt->cip_saved != cip) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if (__wt_cell_rle(&unpack) == 1)
continue;
@@ -435,7 +435,7 @@ restart_read:
static inline int
__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
{
- WT_CELL_UNPACK kpack;
+ WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
WT_ITEM *key;
WT_PAGE *page;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index a835e593022..d0fb68ecb03 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -35,12 +35,8 @@ struct __wt_dbg {
static const /* Output separator */
char *const sep = "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
-static int __debug_cell(WT_DBG *, const WT_PAGE_HEADER *, WT_CELL_UNPACK *);
-static int __debug_cell_data(WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *);
static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool);
static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
-static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *);
-static int __debug_dsk_col_fix(WT_DBG *, const WT_PAGE_HEADER *);
static int __debug_modify(WT_DBG *, WT_UPDATE *, const char *);
static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
static int __debug_page_col_fix(WT_DBG *, WT_REF *);
@@ -410,6 +406,237 @@ err:
}
/*
+ * __debug_cell_int_data --
+ * Dump a single WT_COL_INT or WT_ROW_INT disk image cell's data in debugging mode.
+ */
+static int
+__debug_cell_int_data(WT_DBG *ds, WT_CELL_UNPACK_ADDR *unpack)
+{
+ const char *p;
+
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ p = __wt_cell_type_string(unpack->raw);
+ return (__debug_item(ds, NULL, p, strlen(p)));
+ }
+ return (0);
+}
+
+/*
+ * __debug_cell_int --
+ * Dump a single unpacked WT_COL_INT or WT_ROW_INT disk image WT_CELL.
+ */
+static int
+__debug_cell_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_ADDR *unpack)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char time_string[WT_TIME_STRING_SIZE];
+
+ session = ds->session;
+
+ WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size));
+
+ /* Dump the cell's per-disk page type information. */
+ switch (dsk->type) {
+ case WT_PAGE_COL_INT:
+ WT_RET(ds->f(ds, ", recno: %" PRIu64, unpack->v));
+ break;
+ }
+
+ /* Dump timestamps and addresses. */
+ switch (unpack->raw) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
+
+ WT_RET(__wt_scr_alloc(session, 128, &buf));
+ ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
+ __wt_scr_free(session, &buf);
+ WT_RET(ret);
+ break;
+ }
+ WT_RET(ds->f(ds, "\n"));
+
+ return (__debug_cell_int_data(ds, unpack));
+}
+
+/*
+ * __debug_dsk_int --
+ * Dump a WT_COL_INT or WT_ROW_INT disk image.
+ */
+static int
+__debug_dsk_int(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
+{
+ WT_CELL_UNPACK_ADDR unpack;
+
+ WT_CELL_FOREACH_ADDR (ds->session, dsk, unpack) {
+ WT_RET(__debug_cell_int(ds, dsk, &unpack));
+ }
+ WT_CELL_FOREACH_END;
+ return (0);
+}
+
+/*
+ * __debug_cell_kv_data --
+ * Dump a single WT_COL_VAR or WT_ROW_LEAF disk image cell's data in debugging mode.
+ */
+static int
+__debug_cell_kv_data(
+ WT_DBG *ds, WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK_KV *unpack)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ const char *p;
+
+ session = ds->session;
+
+ /*
+ * Column-store references to deleted cells return a NULL cell reference.
+ */
+ if (unpack == NULL)
+ return (__debug_item(ds, tag, "deleted", strlen("deleted")));
+
+ /*
+ * Row-store references to empty cells return a NULL on-page reference.
+ */
+ if (unpack->cell == NULL)
+ return (__debug_item(ds, tag, "", 0));
+
+ switch (unpack->raw) {
+ case WT_CELL_DEL:
+ p = __wt_cell_type_string(unpack->raw);
+ return (__debug_item(ds, tag, p, strlen(p)));
+ }
+
+ WT_RET(__wt_scr_alloc(session, 256, &buf));
+ WT_ERR(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, buf) :
+ __wt_page_cell_data_ref(session, page, unpack, buf));
+
+ switch (unpack->raw) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT:
+ case WT_CELL_KEY_SHORT_PFX:
+ WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size));
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_SHORT:
+ WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size));
+ break;
+ }
+
+err:
+ __wt_scr_free(session, &buf);
+ return (ret);
+}
+
+/*
+ * __debug_cell_kv --
+ * Dump a single unpacked WT_COL_VAR or WT_ROW_LEAF disk image WT_CELL.
+ */
+static int
+__debug_cell_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack)
+{
+ WT_DECL_ITEM(buf);
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char time_string[WT_TIME_STRING_SIZE];
+
+ session = ds->session;
+
+ WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size));
+
+ /* Dump cell's per-disk page type information. */
+ switch (dsk->type) {
+ case WT_PAGE_COL_VAR:
+ WT_RET(ds->f(ds, ", rle: %" PRIu64, __wt_cell_rle(unpack)));
+ break;
+ case WT_PAGE_ROW_LEAF:
+ switch (unpack->raw) {
+ case WT_CELL_KEY_PFX:
+ case WT_CELL_KEY_SHORT_PFX:
+ WT_RET(ds->f(ds, ", pfx: %" PRIu8, unpack->prefix));
+ break;
+ }
+ break;
+ }
+
+ /* Dump timestamps. */
+ switch (unpack->raw) {
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ case WT_CELL_VALUE_SHORT:
+ WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
+ break;
+ }
+
+ /* Dump overflow addresses. */
+ switch (unpack->raw) {
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_VALUE_OVFL:
+ WT_RET(__wt_scr_alloc(session, 128, &buf));
+ ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
+ __wt_scr_free(session, &buf);
+ WT_RET(ret);
+ break;
+ }
+ WT_RET(ds->f(ds, "\n"));
+
+ return (__debug_cell_kv_data(ds, NULL, dsk->type, NULL, unpack));
+}
+
+/*
+ * __debug_dsk_kv --
+ * Dump a WT_COL_VAR or WT_ROW_LEAF disk image.
+ */
+static int
+__debug_dsk_kv(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
+{
+ WT_CELL_UNPACK_KV unpack;
+
+ WT_CELL_FOREACH_KV (ds->session, dsk, unpack) {
+ WT_RET(__debug_cell_kv(ds, dsk, &unpack));
+ }
+ WT_CELL_FOREACH_END;
+ return (0);
+}
+
+/*
+ * __debug_dsk_col_fix --
+ * Dump a WT_PAGE_COL_FIX disk image.
+ */
+static int
+__debug_dsk_col_fix(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
+{
+ WT_BTREE *btree;
+ uint32_t i;
+ uint8_t v;
+
+ btree = S2BT(ds->session);
+
+ WT_FIX_FOREACH (btree, dsk, v, i) {
+ WT_RET(ds->f(ds, "\t{"));
+ WT_RET(__debug_hex_byte(ds, v));
+ WT_RET(ds->f(ds, "}\n"));
+ }
+ return (0);
+}
+
+/*
* __wt_debug_disk --
* Dump a disk page in debugging mode.
*/
@@ -460,10 +687,12 @@ __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char
WT_ERR(__debug_dsk_col_fix(ds, dsk));
break;
case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
case WT_PAGE_ROW_INT:
+ WT_ERR(__debug_dsk_int(ds, dsk));
+ break;
+ case WT_PAGE_COL_VAR:
case WT_PAGE_ROW_LEAF:
- WT_ERR(__debug_dsk_cell(ds, dsk));
+ WT_ERR(__debug_dsk_kv(ds, dsk));
break;
default:
break;
@@ -475,50 +704,6 @@ err:
}
/*
- * __debug_dsk_col_fix --
- * Dump a WT_PAGE_COL_FIX page.
- */
-static int
-__debug_dsk_col_fix(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
-{
- WT_BTREE *btree;
- uint32_t i;
- uint8_t v;
-
- WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
-
- btree = S2BT(ds->session);
-
- WT_FIX_FOREACH (btree, dsk, v, i) {
- WT_RET(ds->f(ds, "\t{"));
- WT_RET(__debug_hex_byte(ds, v));
- WT_RET(ds->f(ds, "}\n"));
- }
- return (0);
-}
-
-/*
- * __debug_dsk_cell --
- * Dump a page of WT_CELL's.
- */
-static int
-__debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
-{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
-
- WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
-
- btree = S2BT(ds->session);
-
- WT_CELL_FOREACH_BEGIN (ds->session, btree, dsk, unpack) {
- WT_RET(__debug_cell(ds, dsk, &unpack));
- }
- WT_CELL_FOREACH_END;
- return (0);
-}
-
-/*
* __debug_tree_shape_info --
* Pretty-print information about a page.
*/
@@ -787,7 +972,7 @@ err:
*/
int
__wt_debug_key_value(
- WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, uint64_t rle, WT_CELL_UNPACK *value)
+ WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, uint64_t rle, WT_CELL_UNPACK_KV *value)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -801,7 +986,7 @@ __wt_debug_key_value(
else
WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
WT_ERR(__debug_time_window(ds, "T", &value->tw));
- WT_ERR(__debug_cell_data(ds, NULL, value != NULL ? value->type : 0, "V", value));
+ WT_ERR(__debug_cell_kv_data(ds, NULL, value != NULL ? value->type : 0, "V", value));
err:
return (__debug_wrapup(ds));
@@ -1065,7 +1250,7 @@ static int
__debug_page_col_var(WT_DBG *ds, WT_REF *ref)
{
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_COL *cip;
WT_INSERT_HEAD *update;
WT_PAGE *page;
@@ -1079,10 +1264,10 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
WT_COL_FOREACH (page, cip, i) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(ds->session, page, cell, unpack);
+ __wt_cell_unpack_kv(ds->session, page->dsk, cell, unpack);
rle = __wt_cell_rle(unpack);
WT_RET(__wt_snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle));
- WT_RET(__debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack));
+ WT_RET(__debug_cell_kv_data(ds, page, WT_PAGE_COL_VAR, tag, unpack));
if ((update = WT_COL_UPDATE(page, cip)) != NULL)
WT_RET(__debug_col_skip(ds, update, "update", false));
@@ -1137,7 +1322,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
static int
__debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
{
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_DECL_ITEM(key);
WT_DECL_RET;
WT_INSERT_HEAD *insert;
@@ -1162,7 +1347,7 @@ __debug_page_row_leaf(WT_DBG *ds, WT_PAGE *page)
WT_ERR(__debug_item_key(ds, "K", key->data, key->size));
__wt_row_leaf_value_cell(session, page, rip, NULL, unpack);
- WT_ERR(__debug_cell_data(ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
+ WT_ERR(__debug_cell_kv_data(ds, page, WT_PAGE_ROW_LEAF, "V", unpack));
if ((upd = WT_ROW_UPDATE(page, rip)) != NULL)
WT_ERR(__debug_update(ds, upd, false));
@@ -1361,154 +1546,4 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
__wt_addr_string(session, addr.addr, addr.size, ds->t1)));
return (ds->f(ds, "\n"));
}
-
-/*
- * __debug_cell --
- * Dump a single unpacked WT_CELL.
- */
-static int
-__debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
-{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- char time_string[WT_TIME_STRING_SIZE];
-
- session = ds->session;
-
- WT_RET(ds->f(ds, "\t%s: len %" PRIu32, __wt_cell_type_string(unpack->raw), unpack->size));
-
- /* Dump cell's per-disk page type information. */
- switch (dsk->type) {
- case WT_PAGE_COL_INT:
- switch (unpack->type) {
- case WT_CELL_VALUE:
- WT_RET(ds->f(ds, ", recno: %" PRIu64, unpack->v));
- break;
- }
- break;
- case WT_PAGE_COL_VAR:
- switch (unpack->type) {
- case WT_CELL_DEL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- WT_RET(ds->f(ds, ", rle: %" PRIu64, __wt_cell_rle(unpack)));
- break;
- }
- break;
- case WT_PAGE_ROW_INT:
- case WT_PAGE_ROW_LEAF:
- switch (unpack->type) {
- case WT_CELL_KEY:
- WT_RET(ds->f(ds, ", pfx: %" PRIu8, unpack->prefix));
- break;
- }
- break;
- }
-
- /* Dump timestamps. */
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- WT_RET(ds->f(ds, ", %s", __wt_time_aggregate_to_string(&unpack->ta, time_string)));
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- case WT_CELL_VALUE_SHORT:
- WT_RET(ds->f(ds, ", %s", __wt_time_window_to_string(&unpack->tw, time_string)));
- break;
- }
-
- /* Dump addresses. */
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- WT_RET(__wt_scr_alloc(session, 128, &buf));
- ret = ds->f(ds, ", %s", __wt_addr_string(session, unpack->data, unpack->size, buf));
- __wt_scr_free(session, &buf);
- WT_RET(ret);
- break;
- }
- WT_RET(ds->f(ds, "\n"));
-
- return (__debug_cell_data(ds, NULL, dsk->type, NULL, unpack));
-}
-
-/*
- * __debug_cell_data --
- * Dump a single cell's data in debugging mode.
- */
-static int
-__debug_cell_data(WT_DBG *ds, WT_PAGE *page, int page_type, const char *tag, WT_CELL_UNPACK *unpack)
-{
- WT_DECL_ITEM(buf);
- WT_DECL_RET;
- WT_SESSION_IMPL *session;
- const char *p;
-
- session = ds->session;
-
- /*
- * Column-store references to deleted cells return a NULL cell reference.
- */
- if (unpack == NULL)
- return (__debug_item(ds, tag, "deleted", strlen("deleted")));
-
- /*
- * Row-store references to empty cells return a NULL on-page reference.
- */
- if (unpack->cell == NULL)
- return (__debug_item(ds, tag, "", 0));
-
- switch (unpack->raw) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_DEL:
- case WT_CELL_KEY_OVFL_RM:
- case WT_CELL_VALUE_OVFL_RM:
- p = __wt_cell_type_string(unpack->raw);
- return (__debug_item(ds, tag, p, strlen(p)));
- }
-
- WT_RET(__wt_scr_alloc(session, 256, &buf));
- WT_ERR(page == NULL ? __wt_dsk_cell_data_ref(session, page_type, unpack, buf) :
- __wt_page_cell_data_ref(session, page, unpack, buf));
-
- switch (unpack->raw) {
- case WT_CELL_KEY:
- case WT_CELL_KEY_OVFL:
- case WT_CELL_KEY_PFX:
- case WT_CELL_KEY_SHORT:
- case WT_CELL_KEY_SHORT_PFX:
- WT_ERR(__debug_item_key(ds, tag, buf->data, buf->size));
- break;
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_SHORT:
- WT_ERR(__debug_item_value(ds, tag, buf->data, buf->size));
- break;
- default:
- WT_ERR(__wt_illegal_value(session, unpack->raw));
- }
-
-err:
- __wt_scr_free(session, &buf);
- return (ret);
-}
#endif
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index cccd2c628a3..855272ce1d9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -43,8 +43,8 @@ __ovfl_read(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_
* Bring an overflow item into memory.
*/
int
-__wt_ovfl_read(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded)
+__wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK_COMMON *unpack,
+ WT_ITEM *store, bool *decoded)
{
WT_DECL_RET;
@@ -65,7 +65,6 @@ __wt_ovfl_read(
*/
__wt_readlock(session, &S2BT(session)->ovfl_lock);
if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
- WT_ASSERT(session, __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.stop_ts));
ret = __wt_buf_setstr(session, store, "WT_CELL_VALUE_OVFL_RM");
*decoded = true;
} else
@@ -80,7 +79,7 @@ __wt_ovfl_read(
* Remove an overflow value.
*/
int
-__wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack)
+__wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK_KV *unpack)
{
/*
* This function solves two problems in reconciliation.
@@ -121,13 +120,13 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
{
WT_BM *bm;
WT_BTREE *btree;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
btree = S2BT(session);
bm = btree->bm;
unpack = &_unpack;
- __wt_cell_unpack(session, page, cell, unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, unpack);
/*
* Remove overflow key/value objects, called when reconciliation finishes after successfully
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 1a690b24804..6d1c377a3f0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -265,14 +265,11 @@ __inmem_col_fix(WT_SESSION_IMPL *session, WT_PAGE *page)
static void
__inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
WT_PAGE_INDEX *pindex;
WT_REF **refp, *ref;
uint32_t hint;
- btree = S2BT(session);
-
/*
* Walk the page, building references: the page contains value items. The value items are
* on-page items (WT_CELL_VALUE).
@@ -280,7 +277,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
pindex = WT_INTL_INDEX_GET_SAFE(page);
refp = pindex->index;
hint = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, page->dsk, unpack) {
ref = *refp++;
ref->home = page;
ref->pindex_hint = hint++;
@@ -299,15 +296,12 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
static void
__inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
*np = 0;
- btree = S2BT(session);
-
/* Walk the page, counting entries for the repeats array. */
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, page->dsk, unpack) {
if (__wt_cell_rle(&unpack) > 1)
++*np;
}
@@ -321,8 +315,7 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
static int
__inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_COL *cip;
WT_COL_RLE *repeats;
size_t size;
@@ -330,8 +323,6 @@ __inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t
uint32_t indx, n, repeat_off;
void *p;
- btree = S2BT(session);
-
repeats = NULL;
repeat_off = 0;
@@ -342,7 +333,7 @@ __inmem_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t
*/
indx = 0;
cip = page->pg_var;
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, page->dsk, unpack) {
WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell));
cip++;
@@ -383,7 +374,7 @@ static int
__inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
{
WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
WT_DECL_ITEM(current);
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
@@ -403,7 +394,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
refp = pindex->index;
overflow_keys = false;
hint = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, page->dsk, unpack) {
ref = *refp;
ref->home = page;
ref->pindex_hint = hint++;
@@ -499,12 +490,9 @@ err:
static int
__inmem_row_leaf_entries(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
uint32_t nindx;
- btree = S2BT(session);
-
/*
* Leaf row-store page entries map to a maximum of one-to-one to the number of physical entries
* on the page (each physical entry might be a key without a subsequent data item). To avoid
@@ -516,7 +504,7 @@ __inmem_row_leaf_entries(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, ui
* overflow (WT_CELL_VALUE_OVFL) item.
*/
nindx = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, dsk, unpack) {
switch (unpack.type) {
case WT_CELL_KEY:
case WT_CELL_KEY_OVFL:
@@ -543,7 +531,7 @@ static int
__inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_ITEM buf;
WT_ROW *rip;
WT_UPDATE **upd_array, *upd;
@@ -558,7 +546,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
/* Walk the page, building indices. */
rip = page->pg_row;
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, page->dsk, unpack) {
if (instantiate_prepared && !prepare && F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE))
prepare = true;
switch (unpack.type) {
@@ -587,7 +575,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
*/
if (!btree->huffman_value && unpack.tw.stop_txn == WT_TXN_MAX &&
unpack.tw.stop_ts == WT_TS_MAX && !F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE) &&
- __wt_txn_visible_all(session, unpack.tw.start_txn, unpack.tw.start_ts))
+ __wt_txn_visible_all(session, unpack.tw.start_txn, unpack.tw.durable_start_ts))
__wt_row_leaf_value_set(page, rip - 1, &unpack);
break;
case WT_CELL_VALUE_OVFL:
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 5f29cf08691..039a4b5c833 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -57,7 +57,7 @@ __rebalance_discard(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
*/
static int
__rebalance_leaf_append(WT_SESSION_IMPL *session, const uint8_t *key, size_t key_len,
- WT_CELL_UNPACK *unpack, WT_REBALANCE_STUFF *rs)
+ WT_CELL_UNPACK_ADDR *unpack, WT_REBALANCE_STUFF *rs)
{
WT_ADDR *copy_addr;
WT_REF *copy;
@@ -185,13 +185,10 @@ __rebalance_free_original(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
static int
__rebalance_col_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
WT_DECL_ITEM(buf);
WT_DECL_RET;
- btree = S2BT(session);
-
WT_ERR(__wt_scr_alloc(session, 0, &buf));
/* Report progress periodically. */
@@ -202,7 +199,7 @@ __rebalance_col_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REB
* Walk the page, instantiating keys: the page contains sorted key and location cookie pairs.
* Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items.
*/
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, dsk, unpack) {
switch (unpack.type) {
case WT_CELL_ADDR_INT:
/* An internal page: read it and recursively walk it. */
@@ -260,8 +257,7 @@ __rebalance_row_leaf_key(WT_SESSION_IMPL *session, const uint8_t *addr, size_t a
static int
__rebalance_row_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK key, unpack;
+ WT_CELL_UNPACK_ADDR key, unpack;
WT_DECL_ITEM(buf);
WT_DECL_ITEM(leafkey);
WT_DECL_RET;
@@ -269,7 +265,6 @@ __rebalance_row_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REB
bool first_cell;
const void *p;
- btree = S2BT(session);
WT_CLEAR(key); /* [-Werror=maybe-uninitialized] */
WT_ERR(__wt_scr_alloc(session, 0, &buf));
@@ -284,7 +279,7 @@ __rebalance_row_walk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REB
* Keys are on-page/overflow items and location cookies are WT_CELL_ADDR_XXX items.
*/
first_cell = true;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, dsk, unpack) {
switch (unpack.type) {
case WT_CELL_KEY:
key = unpack;
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 1a2360f6d09..abffa19cf56 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -76,9 +76,9 @@ __key_return(WT_CURSOR_BTREE *cbt)
static void
__read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_WINDOW *tw)
{
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
__wt_time_window_copy(tw, &unpack.tw);
}
@@ -89,7 +89,7 @@ __read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, W
void
__wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_WINDOW *tw)
{
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
__wt_time_window_init(tw);
/*
@@ -138,7 +138,7 @@ __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_W
{
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_CURSOR *cursor;
WT_PAGE *page;
WT_ROW *rip;
@@ -174,7 +174,7 @@ __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_W
if (page->type == WT_PAGE_COL_VAR) {
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
if (tw != NULL)
__wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 344c6a573d7..0571404cf89 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -567,14 +567,12 @@ static int
__slvg_trk_leaf(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *addr,
size_t addr_size, WT_STUFF *ss)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_DECL_RET;
WT_PAGE *page;
WT_TRACK *trk;
uint64_t stop_recno;
- btree = S2BT(session);
page = NULL;
trk = NULL;
@@ -603,7 +601,7 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *ad
* stop key requires walking the page.
*/
stop_recno = dsk->recno;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, dsk, unpack) {
stop_recno += __wt_cell_rle(&unpack);
}
WT_CELL_FOREACH_END;
@@ -683,15 +681,12 @@ __slvg_trk_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *ad
static int
__slvg_trk_leaf_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRACK *trk)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
uint32_t ovfl_cnt;
- btree = S2BT(session);
-
/* Count page overflow items. */
ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, dsk, unpack) {
if (FLD_ISSET(unpack.flags, WT_CELL_UNPACK_OVERFLOW))
++ovfl_cnt;
}
@@ -706,7 +701,7 @@ __slvg_trk_leaf_ovfl(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRA
trk->trk_ovfl_cnt = ovfl_cnt;
ovfl_cnt = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, dsk, unpack) {
if (FLD_ISSET(unpack.flags, WT_CELL_UNPACK_OVERFLOW)) {
WT_RET(
__wt_memdup(session, unpack.data, unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
@@ -1297,7 +1292,7 @@ err:
* Find a single overflow record in the merge page's list, and mark it as referenced.
*/
static int
-__slvg_col_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
+__slvg_col_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK_KV *unpack)
{
WT_TRACK *ovfl;
uint32_t i;
@@ -1325,7 +1320,7 @@ __slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint64_t
uint64_t skip, uint64_t take)
{
WT_CELL *cell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_COL *cip;
WT_DECL_RET;
uint64_t start, stop;
@@ -1340,7 +1335,7 @@ __slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint64_t
WT_COL_FOREACH (page, cip, i) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
recno += __wt_cell_rle(&unpack);
/*
@@ -1957,7 +1952,7 @@ err:
* Find a single overflow record in the merge page's list, and mark it as referenced.
*/
static int
-__slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack)
+__slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK_KV *unpack)
{
WT_TRACK *ovfl;
uint32_t i;
@@ -1989,7 +1984,7 @@ __slvg_row_ovfl(
WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint32_t start, uint32_t stop)
{
WT_CELL *cell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_ROW *rip;
void *copy;
@@ -2001,7 +1996,7 @@ __slvg_row_ovfl(
copy = WT_ROW_KEY_COPY(rip);
WT_IGNORE_RET_BOOL(__wt_row_leaf_key_info(page, copy, NULL, &cell, NULL, NULL));
if (cell != NULL) {
- __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
}
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 2a016d6d725..b4083c16e12 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -159,7 +159,7 @@ static int
__split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
{
WT_CELL *cell;
- WT_CELL_UNPACK kpack;
+ WT_CELL_UNPACK_KV kpack;
WT_IKEY *ikey;
uint32_t cell_offset;
@@ -181,7 +181,7 @@ __split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
ikey->cell_offset = 0;
cell = WT_PAGE_REF_OFFSET(page, cell_offset);
- __wt_cell_unpack(session, page, cell, &kpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, &kpack);
if (FLD_ISSET(kpack.flags, WT_CELL_UNPACK_OVERFLOW) && kpack.raw != WT_CELL_KEY_OVFL_RM)
WT_RET(__wt_ovfl_discard(session, page, cell));
@@ -197,7 +197,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_ref
WT_REF **to_refp, size_t *incrp)
{
WT_ADDR *addr, *ref_addr;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
WT_DECL_RET;
WT_IKEY *ikey;
WT_REF *ref;
@@ -247,7 +247,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_ref
*/
WT_ORDERED_READ(ref_addr, ref->addr);
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
- __wt_cell_unpack(session, from_home, (WT_CELL *)ref_addr, &unpack);
+ __wt_cell_unpack_addr(session, from_home->dsk, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
__wt_time_aggregate_copy(&addr->ta, &unpack.ta);
WT_ERR(__wt_memdup(session, unpack.data, unpack.size, &addr->addr));
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index 2005c279771..83143b60682 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -134,7 +134,7 @@ static void
__stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_COL *cip;
WT_INSERT *ins;
uint64_t deleted_cnt, entry_cnt, ovfl_cnt, rle_cnt;
@@ -154,7 +154,7 @@ __stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **sta
*/
WT_COL_FOREACH (page, cip, i) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, unpack);
if (unpack->type == WT_CELL_DEL) {
orig_deleted = true;
deleted_cnt += __wt_cell_rle(unpack);
@@ -218,11 +218,9 @@ __stat_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **sta
static void
__stat_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
uint32_t ovfl_cnt;
- btree = S2BT(session);
ovfl_cnt = 0;
WT_STAT_INCR(session, stats, btree_row_internal);
@@ -232,7 +230,7 @@ __stat_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **sta
* representation of the page doesn't necessarily contain a reference to the original cell.
*/
if (page->dsk != NULL) {
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, page->dsk, unpack) {
if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL)
++ovfl_cnt;
}
@@ -249,15 +247,13 @@ __stat_page_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **sta
static void
__stat_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
WT_INSERT *ins;
WT_ROW *rip;
WT_UPDATE *upd;
uint32_t empty_values, entry_cnt, i, ovfl_cnt;
bool key;
- btree = S2BT(session);
empty_values = entry_cnt = ovfl_cnt = 0;
WT_STAT_INCR(session, stats, btree_row_leaf);
@@ -298,7 +294,7 @@ __stat_page_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **st
*/
if (page->dsk != NULL) {
key = false;
- WT_CELL_FOREACH_BEGIN (session, btree, page->dsk, unpack) {
+ WT_CELL_FOREACH_KV (session, page->dsk, unpack) {
switch (__wt_cell_type(unpack.cell)) {
case WT_CELL_KEY_OVFL:
++ovfl_cnt;
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 100aeed5105..71885a341d0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -179,7 +179,7 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
WT_DECL_RET;
WT_MULTI *multi;
WT_PAGE_MODIFY *mod;
- wt_timestamp_t newest_stop_ts;
+ wt_timestamp_t newest_stop_durable_ts;
uint64_t newest_stop_txn;
uint32_t i;
uint8_t previous_state;
@@ -214,15 +214,20 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
* dirty. This is to ensure the parent is written during the checkpoint and the child page
* discarded.
*/
- newest_stop_ts = WT_TS_NONE;
+ newest_stop_durable_ts = WT_TS_NONE;
newest_stop_txn = WT_TXN_NONE;
obsolete = false;
if (previous_state == WT_REF_DISK) {
/* There should be an address, but simply skip any page where we don't find one. */
if (__wt_ref_addr_copy(session, ref, &addr)) {
- newest_stop_ts = addr.ta.newest_stop_ts;
+ /*
+ * Max stop timestamp is possible only when the prepared update is written to the data
+ * store.
+ */
+ newest_stop_durable_ts =
+ addr.ta.newest_stop_ts == WT_TS_MAX ? WT_TS_MAX : addr.ta.newest_stop_durable_ts;
newest_stop_txn = addr.ta.newest_stop_txn;
- obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
+ obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_durable_ts);
}
if (obsolete) {
@@ -237,7 +242,7 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
"%p on-disk page obsolete check: %s"
"obsolete, stop ts/txn %s",
(void *)ref, obsolete ? "" : "not ",
- __wt_time_pair_to_string(newest_stop_ts, newest_stop_txn, tp_string));
+ __wt_time_pair_to_string(newest_stop_durable_ts, newest_stop_txn, tp_string));
return (0);
}
WT_REF_UNLOCK(ref, previous_state);
@@ -275,21 +280,26 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
/* Calculate the max stop time pair by traversing all multi addresses. */
for (multi = mod->mod_multi, i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
newest_stop_txn = WT_MAX(newest_stop_txn, multi->addr.ta.newest_stop_txn);
- newest_stop_ts = WT_MAX(newest_stop_ts, multi->addr.ta.newest_stop_ts);
+ newest_stop_durable_ts = WT_MAX(newest_stop_durable_ts,
+ multi->addr.ta.newest_stop_ts == WT_TS_MAX ? WT_TS_MAX :
+ multi->addr.ta.newest_stop_durable_ts);
}
- obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
+ obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_durable_ts);
} else if (mod != NULL && mod->rec_result == WT_PM_REC_REPLACE) {
tag = "reconciled replacement block";
newest_stop_txn = mod->mod_replace.ta.newest_stop_txn;
- newest_stop_ts = mod->mod_replace.ta.newest_stop_ts;
- obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
+ newest_stop_durable_ts = mod->mod_replace.ta.newest_stop_ts == WT_TS_MAX ?
+ WT_TS_MAX :
+ mod->mod_replace.ta.newest_stop_durable_ts;
+ obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_durable_ts);
} else if (__wt_ref_addr_copy(session, ref, &addr)) {
tag = "WT_REF address";
newest_stop_txn = addr.ta.newest_stop_txn;
- newest_stop_ts = addr.ta.newest_stop_ts;
- obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_ts);
+ newest_stop_durable_ts =
+ addr.ta.newest_stop_ts == WT_TS_MAX ? WT_TS_MAX : addr.ta.newest_stop_durable_ts;
+ obsolete = __wt_txn_visible_all(session, newest_stop_txn, newest_stop_durable_ts);
} else
tag = "unexpected page state";
@@ -303,7 +313,7 @@ __sync_ref_obsolete_check(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF_LIST *rl
"%p in-memory page obsolete check: %s %s"
"obsolete, stop ts/txn %s",
(void *)ref, tag, obsolete ? "" : "not ",
- __wt_time_pair_to_string(newest_stop_ts, newest_stop_txn, tp_string));
+ __wt_time_pair_to_string(newest_stop_durable_ts, newest_stop_txn, tp_string));
err:
if (hazard)
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 0b3d4da2459..c2f0ec0c3ce 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -38,11 +38,14 @@ typedef struct {
} WT_VSTUFF;
static void __verify_checkpoint_reset(WT_VSTUFF *);
-static int __verify_page_content(WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
+static int __verify_page_content_int(
+ WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK_ADDR *, WT_VSTUFF *);
+static int __verify_page_content_leaf(
+ WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK_ADDR *, WT_VSTUFF *);
static int __verify_row_int_key_order(
WT_SESSION_IMPL *, WT_PAGE *, WT_REF *, uint32_t, WT_VSTUFF *);
static int __verify_row_leaf_key_order(WT_SESSION_IMPL *, WT_REF *, WT_VSTUFF *);
-static int __verify_tree(WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK *, WT_VSTUFF *);
+static int __verify_tree(WT_SESSION_IMPL *, WT_REF *, WT_CELL_UNPACK_ADDR *, WT_VSTUFF *);
static int __verify_ts_stable_cmp(
WT_SESSION_IMPL *, WT_ITEM *, WT_REF *, uint32_t, wt_timestamp_t, wt_timestamp_t, WT_VSTUFF *);
@@ -164,7 +167,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_BM *bm;
WT_BTREE *btree;
- WT_CELL_UNPACK addr_unpack;
+ WT_CELL_UNPACK_ADDR addr_unpack;
WT_CKPT *ckptbase, *ckpt;
WT_DECL_RET;
WT_VSTUFF *vs, _vstuff;
@@ -384,43 +387,42 @@ err:
* Check an address block's timestamps.
*/
static int
-__verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack, WT_VSTUFF *vs)
+__verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK_ADDR *unpack, WT_VSTUFF *vs)
{
+ WT_TIME_AGGREGATE *ta;
char time_string[WT_TIME_STRING_SIZE];
- if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
+ ta = &unpack->ta;
+
+ if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE)
WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has a newest stop "
- "timestamp of 0",
- __verify_addr_string(session, ref, vs->tmp1));
- if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
+ "internal page reference at %s has a newest stop timestamp of 0; time aggregate %s",
+ __verify_addr_string(session, ref, vs->tmp1),
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_ts > ta->newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has an oldest start "
- "timestamp newer than its newest stop timestamp; time window %s",
+ "internal page reference at %s has an oldest start timestamp newer than its newest stop "
+ "timestamp; time window %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack->tw, time_string));
- if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_txn > ta->newest_stop_txn)
WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has an oldest start "
- "transaction newer than its newest stop "
- "transaction; time aggregate %s",
+ "internal page reference at %s has an oldest start transaction newer than its newest "
+ "stop transaction; time aggregate %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_ts > ta->newest_start_durable_ts)
WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has an oldest start "
- "timestamp newer than its newest start durable "
- "timestamp; time aggregate %s",
+ "internal page reference at %s has an oldest start timestamp newer than its newest start "
+ "durable timestamp; time aggregate %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
- unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts)
WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has a newest stop "
- "timestamp newer than its newest stop durable "
- "timestamp; time aggregate %s",
+ "internal page reference at %s has a newest stop timestamp newer than its newest stop "
+ "durable timestamp; time aggregate %s",
__verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack->ta, time_string));
+ __wt_time_aggregate_to_string(ta, time_string));
return (0);
}
@@ -431,10 +433,11 @@ __verify_addr_ts(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *unpack,
* Our job is to check logical relationships in the page and in the tree.
*/
static int
-__verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
+__verify_tree(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK_ADDR *addr_unpack, WT_VSTUFF *vs)
{
WT_BM *bm;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_ADDR *unpack, _unpack;
WT_DECL_RET;
WT_PAGE *page;
WT_REF *child_ref;
@@ -515,10 +518,12 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack
vs->records_so_far += page->entries;
break;
case WT_PAGE_COL_INT:
- case WT_PAGE_COL_VAR:
case WT_PAGE_ROW_INT:
+ WT_RET(__verify_page_content_int(session, ref, addr_unpack, vs));
+ break;
+ case WT_PAGE_COL_VAR:
case WT_PAGE_ROW_LEAF:
- WT_RET(__verify_page_content(session, ref, addr_unpack, vs));
+ WT_RET(__verify_page_content_leaf(session, ref, addr_unpack, vs));
break;
}
@@ -542,8 +547,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack
if (addr_unpack->raw != WT_CELL_ADDR_INT)
celltype_err:
WT_RET_MSG(session, WT_ERROR,
- "page at %s, of type %s, is referenced in "
- "its parent by a cell of type %s",
+ "page at %s, of type %s, is referenced in its parent by a cell of type %s",
__verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(page->type),
__wt_cell_type_string(addr_unpack->raw));
break;
@@ -561,17 +565,16 @@ celltype_err:
*/
++entry;
if (child_ref->ref_recno != vs->records_so_far + 1) {
- WT_RET_MSG(session, WT_ERROR, "the starting record number in entry %" PRIu32
- " of the column internal page at "
- "%s is %" PRIu64
- " and the expected "
- "starting record number is %" PRIu64,
+ WT_RET_MSG(session, WT_ERROR,
+ "the starting record number in entry %" PRIu32
+ " of the column internal page at %s is %" PRIu64
+ " and the expected starting record number is %" PRIu64,
entry, __verify_addr_string(session, child_ref, vs->tmp1), child_ref->ref_recno,
vs->records_so_far + 1);
}
/* Unpack the address block and check timestamps */
- __wt_cell_unpack(session, child_ref->home, child_ref->addr, unpack);
+ __wt_cell_unpack_addr(session, child_ref->home->dsk, child_ref->addr, unpack);
WT_RET(__verify_addr_ts(session, child_ref, unpack, vs));
/* Verify the subtree. */
@@ -601,7 +604,7 @@ celltype_err:
WT_RET(__verify_row_int_key_order(session, page, child_ref, entry, vs));
/* Unpack the address block and check timestamps */
- __wt_cell_unpack(session, child_ref->home, child_ref->addr, unpack);
+ __wt_cell_unpack_addr(session, child_ref->home->dsk, child_ref->addr, unpack);
WT_RET(__verify_addr_ts(session, child_ref, unpack, vs));
/* Verify the subtree. */
@@ -645,9 +648,8 @@ __verify_row_int_key_order(
WT_RET(__wt_compare(session, btree->collator, &item, vs->max_key, &cmp));
if (cmp <= 0)
WT_RET_MSG(session, WT_ERROR, "the internal key in entry %" PRIu32
- " on the page at %s "
- "sorts before the last key appearing on page %s, earlier "
- "in the tree: %s, %s",
+ " on the page at %s sorts before the last key appearing on "
+ "page %s, earlier in the tree: %s, %s",
entry, __verify_addr_string(session, ref, vs->tmp1), (char *)vs->max_addr->data,
__wt_buf_set_printable(session, item.data, item.size, vs->tmp2),
__wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp3));
@@ -698,9 +700,8 @@ __verify_row_leaf_key_order(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs
WT_RET(__wt_compare(session, btree->collator, vs->tmp1, (WT_ITEM *)vs->max_key, &cmp));
if (cmp < 0)
WT_RET_MSG(session, WT_ERROR,
- "the first key on the page at %s sorts equal to "
- "or less than the last key appearing on the page "
- "at %s, earlier in the tree: %s, %s",
+ "the first key on the page at %s sorts equal to or less than the last key appearing "
+ "on the page at %s, earlier in the tree: %s, %s",
__verify_addr_string(session, ref, vs->tmp2), (char *)vs->max_addr->data,
__wt_buf_set_printable(session, vs->tmp1->data, vs->tmp1->size, vs->tmp3),
__wt_buf_set_printable(session, vs->max_key->data, vs->max_key->size, vs->tmp4));
@@ -771,8 +772,8 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num, c
return (0);
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s failed verification with %s "
- "timestamp of %s, %s the parent's %s timestamp of %s",
+ " on page at %s failed verification with %s timestamp of %s, %s "
+ "the parent's %s timestamp of %s",
cell_num, __verify_addr_string(session, ref, vs->tmp1), ts1_name,
__verify_timestamp_to_pretty_string(ts1, ts_string[0]), gt ? "less than" : "greater than",
ts2_name, __verify_timestamp_to_pretty_string(ts2, ts_string[1]));
@@ -819,8 +820,8 @@ msg:
__wt_key_string(session, key->data, key->size, btree->key_format, vs->tmp2)));
WT_RET_MSG(session, WT_ERROR,
- "%s has failed verification with a %s"
- " timestamp of %s greater than the stable_timestamp of %s",
+ "%s has failed verification with a %s timestamp of %s greater than the stable_timestamp of "
+ "%s",
(char *)vs->tmp1->data, start ? "start" : "stop",
__wt_timestamp_to_string(start ? start_ts : stop_ts, tp_string[0]),
__wt_timestamp_to_string(vs->stable_timestamp, tp_string[1]));
@@ -846,11 +847,9 @@ __verify_txn_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
if (dsk->write_gen <= S2C(session)->base_write_gen)
return (0);
- WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s failed verification with %s "
- "transaction of %" PRIu64
- ", %s the parent's %s transaction of "
- "%" PRIu64,
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s failed verification with %s transaction of %" PRIu64
+ ", %s the parent's %s transaction of %" PRIu64,
cell_num, __verify_addr_string(session, ref, vs->tmp1), txn1_name, txn1,
gt ? "less than" : "greater than", txn2_name, txn2);
}
@@ -941,30 +940,23 @@ __verify_key_hs(
}
/*
- * __verify_page_content --
- * Verify the page's content.
+ * __verify_page_content_int --
+ * Verify an internal page's content.
*/
static int
-__verify_page_content(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK *addr_unpack, WT_VSTUFF *vs)
+__verify_page_content_int(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK_ADDR *addr_unpack, WT_VSTUFF *vs)
{
- WT_BTREE *btree;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_ADDR unpack;
WT_DECL_RET;
WT_PAGE *page;
const WT_PAGE_HEADER *dsk;
- WT_ROW *rip;
- uint64_t recno, rle;
+ WT_TIME_AGGREGATE *ta;
uint32_t cell_num;
- uint8_t *p;
char time_string[WT_TIME_STRING_SIZE];
- bool found_ovfl;
- btree = S2BT(session);
page = ref->page;
- rip = page->pg_row;
- recno = ref->ref_recno;
- found_ovfl = false;
+ ta = &unpack.ta;
/*
* If a tree is empty (just created), it won't have a disk image; if there is no disk image,
@@ -975,17 +967,21 @@ __verify_page_content(
/* Walk the page, tracking timestamps and verifying overflow pages. */
cell_num = 0;
- WT_CELL_FOREACH_BEGIN (session, btree, dsk, unpack) {
+ WT_CELL_FOREACH_ADDR (session, dsk, unpack) {
++cell_num;
+
+ if (!__wt_cell_type_check(unpack.type, dsk->type))
+ WT_RET_MSG(session, WT_ERROR, "illegal cell and page type combination: cell %" PRIu32
+ " on page at %s is a %s cell on a %s page",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_cell_type_string(unpack.type), __wt_page_type_string(dsk->type));
+
switch (unpack.type) {
case WT_CELL_KEY_OVFL:
- case WT_CELL_VALUE_OVFL:
- found_ovfl = true;
if ((ret = __verify_overflow(session, unpack.data, unpack.size, vs)) != 0)
- WT_RET_MSG(session, ret, "cell %" PRIu32
- " on page at %s references "
- "an overflow item at %s that failed "
- "verification",
+ WT_RET_MSG(session, ret,
+ "cell %" PRIu32
+ " on page at %s references an overflow item at %s that failed verification",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
__wt_addr_string(session, unpack.data, unpack.size, vs->tmp2));
break;
@@ -1000,126 +996,179 @@ __verify_page_content(
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- if (unpack.ta.oldest_start_ts != WT_TS_NONE && unpack.ta.newest_stop_ts == WT_TS_NONE)
- WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a "
- "newest stop timestamp of 0; time window %s",
+ if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s has a newest stop timestamp of 0; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack.ta, time_string));
- if (unpack.ta.oldest_start_ts > unpack.ta.newest_stop_ts)
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_ts > ta->newest_stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has an "
- "oldest start timestamp newer than "
- "its newest stop timestamp; time window %s",
+ " on page at %s has an oldest start timestamp newer "
+ "than its newest stop timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack.ta, time_string));
- if (unpack.ta.oldest_start_txn > unpack.ta.newest_stop_txn) {
- WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page "
- "at %s has an oldest start transaction newer than "
- "its newest stop transaction; time aggregate %s ",
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_txn > ta->newest_stop_txn) {
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32
+ " on page at %s has an oldest start transaction newer than its newest stop "
+ "transaction; time aggregate %s ",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ __wt_time_aggregate_to_string(ta, time_string));
}
- if (unpack.ta.oldest_start_ts > unpack.ta.newest_start_durable_ts)
+ if (ta->oldest_start_ts > ta->newest_start_durable_ts)
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32
- " on page at %s has an "
- "oldest start timestamp newer than "
- "its newest start durable timestamp; time aggregate %s",
+ " on page at %s has an oldest start timestamp newer than its newest start "
+ "durable timestamp; time aggregate %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack.ta, time_string));
- if (unpack.ta.newest_stop_ts != WT_TS_MAX &&
- unpack.ta.newest_stop_ts > unpack.ta.newest_stop_durable_ts)
+ __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts)
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32
- " on page at %s has a "
- "newest stop timestamp newer than "
- "its newest stop durable timestamp; time aggregate %s",
+ " on page at %s has a newest stop timestamp newer than its newest stop durable "
+ "timestamp; time aggregate %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_aggregate_to_string(&unpack.ta, time_string));
+ __wt_time_aggregate_to_string(ta, time_string));
if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
- unpack.ta.newest_start_durable_ts, "start durable",
+ ta->newest_start_durable_ts, "start durable",
addr_unpack->ta.newest_start_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.ta.oldest_start_ts, "oldest start", addr_unpack->ta.oldest_start_ts, true,
- vs));
+ ta->oldest_start_ts, "oldest start", addr_unpack->ta.oldest_start_ts, true, vs));
WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "oldest start",
- unpack.ta.oldest_start_txn, "oldest start", addr_unpack->ta.oldest_start_txn, true,
- dsk, vs));
+ ta->oldest_start_txn, "oldest start", addr_unpack->ta.oldest_start_txn, true, dsk,
+ vs));
if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
- unpack.ta.newest_stop_durable_ts, "stop durable",
+ ta->newest_stop_durable_ts, "stop durable",
addr_unpack->ta.newest_stop_durable_ts, false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.ta.newest_stop_ts, "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
+ ta->newest_stop_ts, "newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "newest stop",
- unpack.ta.newest_stop_txn, "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk,
- vs));
+ ta->newest_stop_txn, "newest stop", addr_unpack->ta.newest_stop_txn, false, dsk, vs));
WT_RET(__verify_ts_stable_cmp(session, NULL, ref, cell_num - 1,
addr_unpack->ta.oldest_start_ts, addr_unpack->ta.newest_stop_ts, vs));
break;
+ }
+ }
+ WT_CELL_FOREACH_END;
+
+ return (0);
+}
+
+/*
+ * __verify_page_content_leaf --
+ * Verify the page's content.
+ */
+static int
+__verify_page_content_leaf(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_CELL_UNPACK_ADDR *addr_unpack, WT_VSTUFF *vs)
+{
+ WT_CELL_UNPACK_KV unpack;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ const WT_PAGE_HEADER *dsk;
+ WT_ROW *rip;
+ WT_TIME_WINDOW *tw;
+ uint64_t recno, rle;
+ uint32_t cell_num;
+ uint8_t *p;
+ char time_string[WT_TIME_STRING_SIZE];
+ bool found_ovfl;
+
+ page = ref->page;
+ rip = page->pg_row;
+ tw = &unpack.tw;
+ recno = ref->ref_recno;
+ found_ovfl = false;
+
+ /*
+ * If a tree is empty (just created), it won't have a disk image; if there is no disk image,
+ * we're done.
+ */
+ if ((dsk = page->dsk) == NULL)
+ return (0);
+
+ /* Walk the page, tracking timestamps and verifying overflow pages. */
+ cell_num = 0;
+ WT_CELL_FOREACH_KV (session, dsk, unpack) {
+ ++cell_num;
+
+ if (!__wt_cell_type_check(unpack.type, dsk->type))
+ WT_RET_MSG(session, WT_ERROR, "illegal cell and page type combination: cell %" PRIu32
+ " on page at %s is a %s cell on a %s page",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_cell_type_string(unpack.type), __wt_page_type_string(dsk->type));
+
+ switch (unpack.type) {
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_VALUE_OVFL:
+ found_ovfl = true;
+ if ((ret = __verify_overflow(session, unpack.data, unpack.size, vs)) != 0)
+ WT_RET_MSG(session, ret,
+ "cell %" PRIu32
+ " on page at %s references an overflow item at %s that failed verification",
+ cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
+ __wt_addr_string(session, unpack.data, unpack.size, vs->tmp2));
+ break;
+ }
+
+ switch (unpack.type) {
case WT_CELL_DEL:
case WT_CELL_VALUE:
case WT_CELL_VALUE_COPY:
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_SHORT:
- if (unpack.tw.start_ts != WT_TS_NONE && unpack.tw.stop_ts == WT_TS_NONE)
- WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a stop "
- "timestamp of 0; time window %s",
+ if (tw->start_ts != WT_TS_NONE && tw->stop_ts == WT_TS_NONE)
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s has a stop timestamp of 0; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack.tw, time_string));
- if (unpack.tw.start_ts > unpack.tw.stop_ts)
+ __wt_time_window_to_string(tw, time_string));
+ if (tw->start_ts > tw->stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a "
- "start timestamp newer than its stop "
- "timestamp; time window %s",
+ " on page at %s has a start timestamp newer than its "
+ "stop timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack.tw, time_string));
- if (unpack.tw.start_txn > unpack.tw.stop_txn)
+ __wt_time_window_to_string(tw, time_string));
+ if (tw->start_txn > tw->stop_txn)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a "
- "start transaction newer than "
+ " on page at %s has a start transaction newer than "
"its stop transaction; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack.tw, time_string));
- if (unpack.tw.start_ts > unpack.tw.durable_start_ts)
+ __wt_time_window_to_string(tw, time_string));
+ if (tw->start_ts > tw->durable_start_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a "
- "start timestamp newer than its start durable "
- "timestamp; time window %s",
+ " on page at %s has a start timestamp newer than its "
+ "start durable timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack.tw, time_string));
- if (unpack.tw.stop_ts != WT_TS_MAX && unpack.tw.stop_ts > unpack.tw.durable_stop_ts)
+ __wt_time_window_to_string(tw, time_string));
+ if (tw->stop_ts != WT_TS_MAX && tw->stop_ts > tw->durable_stop_ts)
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s has a "
- "stop timestamp newer than its stop durable "
- "timestamp; time window %s",
+ " on page at %s has a stop timestamp newer than its "
+ "stop durable timestamp; time window %s",
cell_num - 1, __verify_addr_string(session, ref, vs->tmp1),
- __wt_time_window_to_string(&unpack.tw, time_string));
+ __wt_time_window_to_string(tw, time_string));
if (addr_unpack->ta.newest_start_durable_ts != WT_TS_NONE)
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start durable",
- unpack.tw.durable_start_ts, "newest durable start",
+ tw->durable_start_ts, "newest durable start",
addr_unpack->ta.newest_start_durable_ts, false, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_ts,
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "start", tw->start_ts,
"oldest start", addr_unpack->ta.oldest_start_ts, true, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", unpack.tw.start_txn,
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "start", tw->start_txn,
"oldest start", addr_unpack->ta.oldest_start_txn, true, dsk, vs));
if (addr_unpack->ta.newest_stop_durable_ts != WT_TS_NONE)
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop durable",
- unpack.tw.durable_stop_ts, "newest durable stop",
+ tw->durable_stop_ts, "newest durable stop",
addr_unpack->ta.newest_stop_durable_ts, false, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_ts,
+ WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1, "stop", tw->stop_ts,
"newest stop", addr_unpack->ta.newest_stop_ts, false, vs));
- WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", unpack.tw.stop_txn,
+ WT_RET(__verify_txn_addr_cmp(session, ref, cell_num - 1, "stop", tw->stop_txn,
"newest stop", addr_unpack->ta.newest_stop_txn, false, dsk, vs));
WT_RET(__verify_ts_stable_cmp(
- session, NULL, ref, cell_num - 1, unpack.tw.start_ts, unpack.tw.stop_ts, vs));
+ session, NULL, ref, cell_num - 1, tw->start_ts, tw->stop_ts, vs));
break;
}
@@ -1128,11 +1177,12 @@ __verify_page_content(
* values in debug mode.
*/
if (page->type == WT_PAGE_ROW_LEAF) {
- if (unpack.type != WT_CELL_KEY && unpack.type != WT_CELL_KEY_OVFL)
+ if (unpack.type != WT_CELL_VALUE && unpack.type != WT_CELL_VALUE_COPY &&
+ unpack.type != WT_CELL_VALUE_OVFL && unpack.type != WT_CELL_VALUE_SHORT)
continue;
WT_RET(__wt_row_leaf_key(session, page, rip++, vs->tmp1, false));
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
@@ -1143,7 +1193,7 @@ __verify_page_content(
p = vs->tmp1->mem;
WT_RET(__wt_vpack_uint(&p, 0, recno));
vs->tmp1->size = WT_PTRDIFF(p, vs->tmp1->mem);
- WT_RET(__verify_key_hs(session, vs->tmp1, unpack.tw.start_ts, vs));
+ WT_RET(__verify_key_hs(session, vs->tmp1, tw->start_ts, vs));
#ifdef HAVE_DIAGNOSTIC
if (vs->dump_history)
@@ -1162,8 +1212,8 @@ __verify_page_content(
*/
if (found_ovfl && addr_unpack->raw == WT_CELL_ADDR_LEAF_NO)
WT_RET_MSG(session, WT_ERROR,
- "page at %s, of type %s and referenced in its parent by a "
- "cell of type %s, contains overflow items",
+ "page at %s, of type %s and referenced in its parent by a cell of type %s, contains "
+ "overflow items",
__verify_addr_string(session, ref, vs->tmp1), __wt_page_type_string(ref->page->type),
__wt_cell_type_string(addr_unpack->raw));
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index a1e96d41dc9..0b29c3ee526 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -16,7 +16,9 @@ static int __verify_dsk_col_fix(WT_SESSION_IMPL *, const char *, const WT_PAGE_H
static int __verify_dsk_col_int(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
static int __verify_dsk_col_var(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
static int __verify_dsk_memsize(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_CELL *);
-static int __verify_dsk_row(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
+static int __verify_dsk_row_int(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
+static int __verify_dsk_row_leaf(
+ WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADER *, WT_ADDR *);
#define WT_ERR_VRFY(session, ...) \
do { \
@@ -44,8 +46,8 @@ static int __verify_dsk_row(WT_SESSION_IMPL *, const char *, const WT_PAGE_HEADE
* WT_CELL_FOREACH macro, created because the loop can't simply unpack cells,
* verify has to do additional work to ensure that unpack is safe.
*/
-#define WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) \
- for ((cell) = WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; (i) > 0; \
+#define WT_CELL_FOREACH_VRFY(session, dsk, cell, unpack, i) \
+ for ((cell) = WT_PAGE_HEADER_BYTE(S2BT(session), dsk), (i) = (dsk)->u.entries; (i) > 0; \
(cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i))
/*
@@ -90,8 +92,7 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_H
if (dsk->recno == WT_RECNO_OOB)
break;
WT_RET_VRFY(session,
- "%s page at %s has a record number, which is illegal for "
- "this page type",
+ "%s page at %s has a record number, which is illegal for this page type",
__wt_page_type_string(dsk->type), tag);
}
@@ -167,8 +168,9 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_H
case WT_PAGE_COL_VAR:
return (__verify_dsk_col_var(session, tag, dsk, addr));
case WT_PAGE_ROW_INT:
+ return (__verify_dsk_row_int(session, tag, dsk, addr));
case WT_PAGE_ROW_LEAF:
- return (__verify_dsk_row(session, tag, dsk, addr));
+ return (__verify_dsk_row_leaf(session, tag, dsk, addr));
case WT_PAGE_BLOCK_MANAGER:
case WT_PAGE_OVFL:
return (__verify_dsk_chunk(session, tag, dsk, dsk->u.datalen));
@@ -227,8 +229,8 @@ __verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num, const char
break;
}
WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s failed verification with %s "
- "timestamp of %s, %s the parent's %s timestamp of %s",
+ " on page at %s failed verification with %s timestamp of %s, %s "
+ "the parent's %s timestamp of %s",
cell_num, tag, ts1_name, ts1_bp, gt ? "less than" : "greater than", ts2_name, ts2_bp);
}
@@ -252,24 +254,25 @@ __verify_dsk_txn_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num, const cha
if (dsk->write_gen <= S2C(session)->base_write_gen)
return (0);
- WT_RET_MSG(session, WT_ERROR, "cell %" PRIu32
- " on page at %s failed verification with %s "
- "transaction of %" PRIu64
- ", %s the parent's %s transaction of "
- "%" PRIu64,
+ WT_RET_MSG(session, WT_ERROR,
+ "cell %" PRIu32 " on page at %s failed verification with %s transaction of %" PRIu64
+ ", %s the parent's %s transaction of %" PRIu64,
cell_num, tag, txn1_name, txn1, gt ? "less than" : "greater than", txn2_name, txn2);
}
/*
- * __verify_dsk_validity --
- * Verify a cell's validity window.
+ * __verify_dsk_addr_validity --
+ * Verify an address cell's validity window.
*/
static int
-__verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t cell_num,
+__verify_dsk_addr_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK_ADDR *unpack, uint32_t cell_num,
WT_ADDR *addr, const char *tag, const WT_PAGE_HEADER *dsk)
{
+ WT_TIME_AGGREGATE *ta;
char time_string[WT_TIME_STRING_SIZE];
+ ta = &unpack->ta;
+
/*
* Check timestamp and transaction order, and optionally against parent values. Timestamps and
* transactions in the parent address aren't necessarily an exact match, but should be within
@@ -279,169 +282,192 @@ __verify_dsk_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK *unpack, uint32_t
* cell-unpacking code hides it by always returning durable values if they don't appear on the
* page.
*/
- switch (unpack->type) {
- case WT_CELL_ADDR_DEL:
- case WT_CELL_ADDR_INT:
- case WT_CELL_ADDR_LEAF:
- case WT_CELL_ADDR_LEAF_NO:
- if (unpack->ta.oldest_start_ts != WT_TS_NONE && unpack->ta.newest_stop_ts == WT_TS_NONE)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a newest stop "
- "timestamp of 0; time aggregate %s",
- cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.oldest_start_ts > unpack->ta.newest_stop_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has an oldest "
- "start timestamp newer than its newest stop "
- "timestamp; time aggregate %s",
- cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.oldest_start_txn > unpack->ta.newest_stop_txn)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has an oldest "
- "start transaction newer than its "
- "newest stop transaction; time aggregate %s",
- cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.oldest_start_ts > unpack->ta.newest_start_durable_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has an oldest "
- "start timestamp newer than its newest start durable "
- "timestamp; time aggregate %s",
- cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
- if (unpack->ta.newest_stop_ts != WT_TS_MAX &&
- unpack->ta.newest_stop_ts > unpack->ta.newest_stop_durable_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a newest "
- "stop timestamp newer than its newest stop durable "
- "timestamp; time aggregate %s",
- cell_num - 1, tag, __wt_time_aggregate_to_string(&unpack->ta, time_string));
-
- if (addr == NULL)
- break;
+ if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32 " on page at %s has a newest stop timestamp of 0; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_ts > ta->newest_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest start timestamp newer than its newest "
+ "stop timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_txn > ta->newest_stop_txn)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest start transaction newer than its newest "
+ "stop transaction; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->oldest_start_ts > ta->newest_start_durable_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has an oldest start timestamp newer than its newest "
+ "start durable timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(ta, time_string));
+ if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a newest stop timestamp newer than its newest "
+ "stop durable timestamp; time aggregate %s",
+ cell_num - 1, tag, __wt_time_aggregate_to_string(ta, time_string));
+
+ if (addr == NULL)
+ return (0);
- if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
- unpack->ta.newest_start_durable_ts, "start durable", addr->ta.newest_start_durable_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->ta.oldest_start_ts, "oldest start", addr->ta.oldest_start_ts, true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "oldest start",
- unpack->ta.oldest_start_txn, "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
-
- if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
- unpack->ta.newest_stop_durable_ts, "stop durable", addr->ta.newest_stop_durable_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->ta.newest_stop_ts, "newest stop", addr->ta.newest_stop_ts, false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "newest stop",
- unpack->ta.newest_stop_txn, "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
- break;
- case WT_CELL_DEL:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_COPY:
- case WT_CELL_VALUE_OVFL:
- case WT_CELL_VALUE_OVFL_RM:
- case WT_CELL_VALUE_SHORT:
- if (unpack->tw.start_ts != WT_TS_NONE && unpack->tw.stop_ts == WT_TS_NONE)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a stop "
- "timestamp of 0; time window %s",
- cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
- if (unpack->tw.start_ts > unpack->tw.stop_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a start "
- "timestamp newer than its stop timestamp; time window %s",
- cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
- if (unpack->tw.start_txn > unpack->tw.stop_txn)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a start "
- "transaction newer than its stop "
- "transaction; time window %s",
- cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
- if (unpack->tw.start_ts > unpack->tw.durable_start_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a start "
- "timestamp newer than its durable start timestamp; time window %s",
- cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
- if (unpack->tw.stop_ts != WT_TS_MAX && unpack->tw.stop_ts > unpack->tw.durable_stop_ts)
- WT_RET_VRFY(session, "cell %" PRIu32
- " on page at %s has a stop "
- "timestamp newer than its durable stop timestamp; time window %s",
- cell_num - 1, tag, __wt_time_window_to_string(&unpack->tw, time_string));
-
- if (addr == NULL)
- break;
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
+ ta->newest_start_durable_ts, "start durable", addr->ta.newest_start_durable_ts, false,
+ tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "oldest start", ta->oldest_start_ts,
+ "oldest start", addr->ta.oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "oldest start", ta->oldest_start_txn,
+ "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
+
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
+ ta->newest_stop_durable_ts, "stop durable", addr->ta.newest_stop_durable_ts, false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "newest stop", ta->newest_stop_ts,
+ "newest stop", addr->ta.newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "newest stop", ta->newest_stop_txn,
+ "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
- if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable",
- unpack->tw.durable_start_ts, "newest start durable", addr->ta.newest_start_durable_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_ts,
- "oldest start", addr->ta.oldest_start_ts, true, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", unpack->tw.start_txn,
- "oldest start", addr->ta.oldest_start_txn, true, tag, dsk));
- if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable",
- unpack->tw.durable_stop_ts, "newest stop durable", addr->ta.newest_stop_durable_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_ts,
- "newest stop", addr->ta.newest_stop_ts, false, tag));
- WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", unpack->tw.stop_txn,
- "newest stop", addr->ta.newest_stop_txn, false, tag, dsk));
- break;
- }
+ return (0);
+}
+
+/*
+ * __verify_dsk_value_validity --
+ * Verify a value cell's validity window.
+ */
+static int
+__verify_dsk_value_validity(WT_SESSION_IMPL *session, WT_CELL_UNPACK_KV *unpack, uint32_t cell_num,
+ WT_ADDR *addr, const char *tag, const WT_PAGE_HEADER *dsk)
+{
+ WT_TIME_WINDOW *tw;
+ char time_string[WT_TIME_STRING_SIZE];
+
+ tw = &unpack->tw;
+
+ /*
+ * Check timestamp and transaction order, and optionally against parent values. Timestamps and
+ * transactions in the parent address aren't necessarily an exact match, but should be within
+ * the boundaries of the parent's information.
+ *
+ * There's no checking if validity information should appear on a page because the
+ * cell-unpacking code hides it by always returning durable values if they don't appear on the
+ * page.
+ */
+ if (unpack->tw.start_ts != WT_TS_NONE && unpack->tw.stop_ts == WT_TS_NONE)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32 " on page at %s has a stop timestamp of 0; time window %s", cell_num - 1,
+ tag, __wt_time_window_to_string(tw, time_string));
+ if (tw->start_ts > tw->stop_ts)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32
+ " on page at %s has a start timestamp newer than its stop timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(tw, time_string));
+ if (tw->start_txn > tw->stop_txn)
+ WT_RET_VRFY(session,
+ "cell %" PRIu32
+ " on page at %s has a start transaction newer than its stop transaction; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(tw, time_string));
+ if (tw->start_ts > tw->durable_start_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a start timestamp newer than its durable start "
+ "timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(tw, time_string));
+ if (tw->stop_ts != WT_TS_MAX && tw->stop_ts > tw->durable_stop_ts)
+ WT_RET_VRFY(session, "cell %" PRIu32
+ " on page at %s has a stop timestamp newer than its durable stop "
+ "timestamp; time window %s",
+ cell_num - 1, tag, __wt_time_window_to_string(tw, time_string));
+
+ if (addr == NULL)
+ return (0);
+
+ if (addr->ta.newest_start_durable_ts != WT_TS_NONE)
+ WT_RET(
+ __verify_dsk_ts_addr_cmp(session, cell_num - 1, "start durable", tw->durable_start_ts,
+ "newest start durable", addr->ta.newest_start_durable_ts, false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "start", tw->start_ts, "oldest start",
+ addr->ta.oldest_start_ts, true, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "start", tw->start_txn, "oldest start",
+ addr->ta.oldest_start_txn, true, tag, dsk));
+ if (addr->ta.newest_stop_durable_ts != WT_TS_NONE)
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop durable", tw->durable_stop_ts,
+ "newest stop durable", addr->ta.newest_stop_durable_ts, false, tag));
+ WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1, "stop", tw->stop_ts, "newest stop",
+ addr->ta.newest_stop_ts, false, tag));
+ WT_RET(__verify_dsk_txn_addr_cmp(session, cell_num - 1, "stop", tw->stop_txn, "newest stop",
+ addr->ta.newest_stop_txn, false, tag, dsk));
return (0);
}
/*
- * __verify_dsk_row --
- * Walk a WT_PAGE_ROW_INT or WT_PAGE_ROW_LEAF disk page and verify it.
+ * __verify_row_key_order_check --
+ * Check key ordering for row-store pages.
*/
static int
-__verify_dsk_row(
+__verify_row_key_order_check(WT_SESSION_IMPL *session, WT_ITEM *last, uint32_t last_cell_num,
+ WT_ITEM *current, uint32_t cell_num, const char *tag)
+{
+ WT_DECL_ITEM(tmp1);
+ WT_DECL_ITEM(tmp2);
+ WT_DECL_RET;
+ int cmp;
+
+ WT_RET(__wt_compare(session, S2BT(session)->collator, last, current, &cmp));
+ if (cmp < 0)
+ return (0);
+
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp1));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp2));
+
+ ret = WT_ERROR;
+ WT_ERR_VRFY(session,
+ "the %" PRIu32 " and %" PRIu32 " keys on page at %s are incorrectly sorted: %s, %s",
+ last_cell_num, cell_num, tag, __wt_buf_set_printable(session, last->data, last->size, tmp1),
+ __wt_buf_set_printable(session, current->data, current->size, tmp2));
+
+err:
+ __wt_scr_free(session, &tmp1);
+ __wt_scr_free(session, &tmp2);
+ return (ret);
+}
+
+/*
+ * __verify_dsk_row_int --
+ * Walk a WT_PAGE_ROW_INT disk page and verify it.
+ */
+static int
+__verify_dsk_row_int(
WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
{
WT_BM *bm;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_ADDR *unpack, _unpack;
WT_DECL_ITEM(current);
- WT_DECL_ITEM(last_ovfl);
- WT_DECL_ITEM(last_pfx);
- WT_DECL_ITEM(tmp1);
- WT_DECL_ITEM(tmp2);
+ WT_DECL_ITEM(last);
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_ITEM *last;
enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
- void *huffman;
- size_t prefix;
uint32_t cell_num, cell_type, i, key_cnt;
uint8_t *end;
- int cmp;
btree = S2BT(session);
bm = btree->bm;
unpack = &_unpack;
- huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;
WT_ERR(__wt_scr_alloc(session, 0, &current));
- WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
- WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl));
- WT_ERR(__wt_scr_alloc(session, 0, &tmp1));
- WT_ERR(__wt_scr_alloc(session, 0, &tmp2));
- last = last_ovfl;
+ WT_ERR(__wt_scr_alloc(session, 0, &last));
end = (uint8_t *)dsk + dsk->mem_size;
last_cell_type = FIRST;
cell_num = 0;
key_cnt = 0;
- WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY (session, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, NULL, end);
if (ret != 0) {
(void)__err_cell_corrupt(session, ret, cell_num, tag);
goto err;
@@ -452,15 +478,20 @@ __verify_dsk_row(
WT_ERR(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
cell_type = unpack->type;
+ /* Internal row-store cells should not have prefix compression or recno/rle fields. */
+ if (unpack->prefix != 0)
+ WT_ERR_VRFY(
+ session, "the %" PRIu32 " cell on page at %s has a non-zero prefix", cell_num, tag);
+ if (unpack->v != 0)
+ WT_ERR_VRFY(session,
+ "the %" PRIu32 " cell on page at %s has a non-zero rle/recno field", cell_num, tag);
+
/*
- * Check ordering relationships between the WT_CELL entries.
- * For row-store internal pages, check for:
- * two values in a row,
- * two keys in a row,
- * a value as the first cell on a page.
- * For row-store leaf pages, check for:
- * two values in a row,
- * a value as the first cell on a page.
+ * Check ordering relationships between the WT_CELL entries. For row-store internal pages,
+ * check for:
+ * - two values in a row,
+ * - two keys in a row,
+ * - a value as the first cell on a page.
*/
switch (cell_type) {
case WT_CELL_KEY:
@@ -471,12 +502,9 @@ __verify_dsk_row(
case WAS_VALUE:
break;
case WAS_KEY:
- if (dsk->type == WT_PAGE_ROW_LEAF)
- break;
- WT_ERR_VRFY(session, "cell %" PRIu32
- " on page at %s is the "
- "first of two adjacent keys",
- cell_num - 1, tag);
+ WT_ERR_VRFY(session,
+ "cell %" PRIu32 " on page at %s is the first of two adjacent keys", cell_num - 1,
+ tag);
}
last_cell_type = WAS_KEY;
break;
@@ -484,17 +512,14 @@ __verify_dsk_row(
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- case WT_CELL_VALUE:
- case WT_CELL_VALUE_OVFL:
switch (last_cell_type) {
case FIRST:
WT_ERR_VRFY(session, "page at %s begins with a value", tag);
case WAS_KEY:
break;
case WAS_VALUE:
- WT_ERR_VRFY(session, "cell %" PRIu32
- " on page at %s is the "
- "first of two adjacent values",
+ WT_ERR_VRFY(session,
+ "cell %" PRIu32 " on page at %s is the first of two adjacent values",
cell_num - 1, tag);
}
last_cell_type = WAS_VALUE;
@@ -502,7 +527,14 @@ __verify_dsk_row(
}
/* Check the validity window. */
- WT_ERR(__verify_dsk_validity(session, unpack, cell_num, addr, tag, dsk));
+ switch (cell_type) {
+ case WT_CELL_ADDR_DEL:
+ case WT_CELL_ADDR_INT:
+ case WT_CELL_ADDR_LEAF:
+ case WT_CELL_ADDR_LEAF_NO:
+ WT_ERR(__verify_dsk_addr_validity(session, unpack, cell_num, addr, tag, dsk));
+ break;
+ }
/* Check if any referenced item has an invalid address. */
switch (cell_type) {
@@ -511,6 +543,167 @@ __verify_dsk_row(
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
case WT_CELL_KEY_OVFL:
+ if ((ret = bm->addr_invalid(bm, session, unpack->data, unpack->size)) == EINVAL)
+ (void)__err_cell_corrupt_or_eof(session, ret, cell_num, tag);
+ WT_ERR(ret);
+ break;
+ }
+
+ /*
+ * Remaining checks are for key order. If this cell isn't a key, we're done, move to the
+ * next cell. If this cell is an overflow item, instantiate the key and compare it with the
+ * last key.
+ */
+ switch (cell_type) {
+ case WT_CELL_KEY:
+ /* Get the cell's data/length and make sure we have enough buffer space. */
+ WT_ERR(__wt_buf_init(session, current, unpack->size));
+
+ /* Copy the data into place. */
+ memcpy((uint8_t *)current->mem, unpack->data, unpack->size);
+ current->size = unpack->size;
+ break;
+ case WT_CELL_KEY_OVFL:
+ WT_ERR(__wt_dsk_cell_data_ref(session, dsk->type, unpack, current));
+ break;
+ default:
+ /* Not a key -- continue with the next cell. */
+ continue;
+ }
+
+ /*
+ * Compare the current key against the last key.
+ *
+ * Be careful about the 0th key on internal pages: we only store the first byte and custom
+ * collators may not be able to handle truncated keys.
+ */
+ if (cell_num > 3)
+ WT_ERR(
+ __verify_row_key_order_check(session, last, cell_num - 2, current, cell_num, tag));
+
+ /* Swap the buffers. */
+ tmp = last;
+ last = current;
+ current = tmp;
+ }
+ WT_ERR(__verify_dsk_memsize(session, tag, dsk, cell));
+
+ /*
+ * On row-store internal pages, the key count should be equal to half the number of physical
+ * entries.
+ */
+ if (key_cnt * 2 != dsk->u.entries)
+ WT_ERR_VRFY(session,
+ "%s page at %s has a key count of %" PRIu32 " and a physical entry count of %" PRIu32,
+ __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
+
+ if (0) {
+err:
+ if (ret == 0)
+ ret = WT_ERROR;
+ }
+ __wt_scr_free(session, &current);
+ __wt_scr_free(session, &last);
+ return (ret);
+}
+
+/*
+ * __verify_dsk_row_leaf --
+ * Walk a WT_PAGE_ROW_LEAF disk page and verify it.
+ */
+static int
+__verify_dsk_row_leaf(
+ WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, WT_ADDR *addr)
+{
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CELL *cell;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
+ WT_DECL_ITEM(current);
+ WT_DECL_ITEM(last_ovfl);
+ WT_DECL_ITEM(last_pfx);
+ WT_DECL_RET;
+ WT_ITEM *last;
+ enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type;
+ void *huffman;
+ size_t prefix;
+ uint32_t cell_num, cell_type, i, key_cnt, last_cell_num;
+ uint8_t *end;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ unpack = &_unpack;
+ huffman = dsk->type == WT_PAGE_ROW_INT ? NULL : btree->huffman_key;
+
+ WT_ERR(__wt_scr_alloc(session, 0, &current));
+ WT_ERR(__wt_scr_alloc(session, 0, &last_pfx));
+ WT_ERR(__wt_scr_alloc(session, 0, &last_ovfl));
+ last = last_ovfl;
+
+ end = (uint8_t *)dsk + dsk->mem_size;
+
+ last_cell_type = FIRST;
+ cell_num = last_cell_num = 0;
+ key_cnt = 0;
+ WT_CELL_FOREACH_VRFY (session, dsk, cell, unpack, i) {
+ ++cell_num;
+
+ /* Carefully unpack the cell. */
+ ret = __wt_cell_unpack_safe(session, dsk, cell, NULL, unpack, end);
+ if (ret != 0) {
+ (void)__err_cell_corrupt(session, ret, cell_num, tag);
+ goto err;
+ }
+
+ /* Check the raw and collapsed cell types. */
+ WT_ERR(__err_cell_type(session, cell_num, tag, unpack->raw, dsk->type));
+ WT_ERR(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
+ cell_type = unpack->type;
+
+ /* Leaf row-store cells should not have recno/rle fields. */
+ if (unpack->v != 0)
+ WT_ERR_VRFY(session,
+ "the %" PRIu32 " cell on page at %s has a non-zero rle/recno field", cell_num, tag);
+
+ /*
+ * Check ordering relationships between the WT_CELL entries. For row-store leaf pages, check
+ * for:
+ * - two values in a row,
+ * - a value as the first cell on a page.
+ */
+ switch (cell_type) {
+ case WT_CELL_KEY:
+ case WT_CELL_KEY_OVFL:
+ ++key_cnt;
+ last_cell_type = WAS_KEY;
+ break;
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_OVFL:
+ switch (last_cell_type) {
+ case FIRST:
+ WT_ERR_VRFY(session, "page at %s begins with a value", tag);
+ case WAS_KEY:
+ break;
+ case WAS_VALUE:
+ WT_ERR_VRFY(session,
+ "cell %" PRIu32 " on page at %s is the first of two adjacent values",
+ cell_num - 1, tag);
+ }
+ last_cell_type = WAS_VALUE;
+ break;
+ }
+
+ /* Check the validity window. */
+ switch (cell_type) {
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_OVFL:
+ WT_ERR(__verify_dsk_value_validity(session, unpack, cell_num, addr, tag, dsk));
+ break;
+ }
+
+ /* Check if any referenced item has an invalid address. */
+ switch (cell_type) {
+ case WT_CELL_KEY_OVFL:
case WT_CELL_VALUE_OVFL:
if ((ret = bm->addr_invalid(bm, session, unpack->data, unpack->size)) == EINVAL)
(void)__err_cell_corrupt_or_eof(session, ret, cell_num, tag);
@@ -542,17 +735,15 @@ __verify_dsk_row(
prefix = unpack->prefix;
if (last_pfx->size == 0 && prefix != 0)
WT_ERR_VRFY(session, "the %" PRIu32
- " key on page at %s is the first "
- "non-overflow key on the page and has a non-zero "
- "prefix compression value",
+ " key on page at %s is the first non-overflow key on the page and "
+ "has a non-zero prefix compression value",
cell_num, tag);
/* Confirm the prefix compression count is possible. */
if (cell_num > 1 && prefix > last->size)
- WT_ERR_VRFY(session, "key %" PRIu32
- " on page at %s has a prefix "
- "compression count of %" WT_SIZET_FMT
- ", larger than the length of the previous key, %" WT_SIZET_FMT,
+ WT_ERR_VRFY(session,
+ "key %" PRIu32 " on page at %s has a prefix compression count of %" WT_SIZET_FMT
+ ", larger than the length of the previous key, %" WT_SIZET_FMT,
cell_num, tag, prefix, last->size);
/*
@@ -591,21 +782,11 @@ __verify_dsk_row(
key_compare:
/*
* Compare the current key against the last key.
- *
- * Be careful about the 0th key on internal pages: we only store the first byte and custom
- * collators may not be able to handle truncated keys.
*/
- if ((dsk->type == WT_PAGE_ROW_INT && cell_num > 3) ||
- (dsk->type != WT_PAGE_ROW_INT && cell_num > 1)) {
- WT_ERR(__wt_compare(session, btree->collator, last, current, &cmp));
- if (cmp >= 0)
- WT_ERR_VRFY(session, "the %" PRIu32 " and %" PRIu32
- " keys on "
- "page at %s are incorrectly sorted: %s, %s",
- cell_num - 2, cell_num, tag,
- __wt_buf_set_printable(session, last->data, last->size, tmp1),
- __wt_buf_set_printable(session, current->data, current->size, tmp2));
- }
+ if (cell_num > 1)
+ WT_ERR(
+ __verify_row_key_order_check(session, last, last_cell_num, current, cell_num, tag));
+ last_cell_num = cell_num;
/*
* Swap the buffers: last always references the last key entry, last_pfx and last_ovfl
@@ -625,28 +806,21 @@ key_compare:
WT_ERR(__verify_dsk_memsize(session, tag, dsk, cell));
/*
- * On row-store internal pages, and on row-store leaf pages, where the
- * "no empty values" flag is set, the key count should be equal to half
- * the number of physical entries. On row-store leaf pages where the
- * "all empty values" flag is set, the key count should be equal to the
- * number of physical entries.
+ * On standard row-store leaf pages there's no check to make, there may be more keys than values
+ * as zero-length values aren't physically stored on the page. On row-store leaf pages, where
+ * the "no empty values" flag is set, the key count should be equal to half the number of
+ * physical entries. On row-store leaf pages where the "all empty values" flag is set, the key
+ * count should be equal to the number of physical entries.
*/
- if (dsk->type == WT_PAGE_ROW_INT && key_cnt * 2 != dsk->u.entries)
- WT_ERR_VRFY(session, "%s page at %s has a key count of %" PRIu32
- " and a "
- "physical entry count of %" PRIu32,
- __wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
- if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) &&
- key_cnt != dsk->u.entries)
+ if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL) && key_cnt != dsk->u.entries)
WT_ERR_VRFY(session,
- "%s page at %s with the 'all empty values' flag set has a "
- "key count of %" PRIu32 " and a physical entry count of %" PRIu32,
+ "%s page at %s with the 'all empty values' flag set has a key count of %" PRIu32
+ " and a physical entry count of %" PRIu32,
__wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
- if (dsk->type == WT_PAGE_ROW_LEAF && F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) &&
- key_cnt * 2 != dsk->u.entries)
+ if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE) && key_cnt * 2 != dsk->u.entries)
WT_ERR_VRFY(session,
- "%s page at %s with the 'no empty values' flag set has a "
- "key count of %" PRIu32 " and a physical entry count of %" PRIu32,
+ "%s page at %s with the 'no empty values' flag set has a key count of %" PRIu32
+ " and a physical entry count of %" PRIu32,
__wt_page_type_string(dsk->type), tag, key_cnt, dsk->u.entries);
if (0) {
@@ -657,8 +831,6 @@ err:
__wt_scr_free(session, &current);
__wt_scr_free(session, &last_pfx);
__wt_scr_free(session, &last_ovfl);
- __wt_scr_free(session, &tmp1);
- __wt_scr_free(session, &tmp2);
return (ret);
}
@@ -673,7 +845,7 @@ __verify_dsk_col_int(
WT_BM *bm;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_ADDR *unpack, _unpack;
WT_DECL_RET;
uint32_t cell_num, i;
uint8_t *end;
@@ -684,11 +856,11 @@ __verify_dsk_col_int(
end = (uint8_t *)dsk + dsk->mem_size;
cell_num = 0;
- WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY (session, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, NULL, end);
if (ret != 0)
return (__err_cell_corrupt(session, ret, cell_num, tag));
@@ -697,7 +869,7 @@ __verify_dsk_col_int(
WT_RET(__err_cell_type(session, cell_num, tag, unpack->type, dsk->type));
/* Check the validity window. */
- WT_RET(__verify_dsk_validity(session, unpack, cell_num, addr, tag, dsk));
+ WT_RET(__verify_dsk_addr_validity(session, unpack, cell_num, addr, tag, dsk));
/* Check if any referenced item is entirely in the file. */
ret = bm->addr_invalid(bm, session, unpack->data, unpack->size);
@@ -743,7 +915,7 @@ __verify_dsk_col_var(
WT_BM *bm;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_DECL_RET;
uint32_t cell_num, cell_type, i;
uint8_t *end;
@@ -759,11 +931,11 @@ __verify_dsk_col_var(
last.deleted = false;
cell_num = 0;
- WT_CELL_FOREACH_VRFY (btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY (session, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- ret = __wt_cell_unpack_safe(session, dsk, cell, unpack, end);
+ ret = __wt_cell_unpack_safe(session, dsk, cell, NULL, unpack, end);
if (ret != 0)
return (__err_cell_corrupt(session, ret, cell_num, tag));
@@ -773,7 +945,7 @@ __verify_dsk_col_var(
cell_type = unpack->type;
/* Check the validity window. */
- WT_RET(__verify_dsk_validity(session, unpack, cell_num, addr, tag, dsk));
+ WT_RET(__verify_dsk_value_validity(session, unpack, cell_num, addr, tag, dsk));
/* Check if any referenced item is entirely in the file. */
if (cell_type == WT_CELL_VALUE_OVFL) {
@@ -797,8 +969,7 @@ __verify_dsk_col_var(
memcmp(last.data, unpack->data, last.size) == 0)
match_err:
WT_RET_VRFY(session, "data entries %" PRIu32 " and %" PRIu32
- " on page at %s are identical and should "
- "have been run-length encoded",
+ " on page at %s are identical and should have been run-length encoded",
cell_num - 1, cell_num, tag);
__wt_time_window_copy(&last.tw, &unpack->tw);
@@ -841,9 +1012,8 @@ __verify_dsk_memsize(
len = WT_PTRDIFF((uint8_t *)dsk + dsk->mem_size, cell);
if (len == 0)
return (0);
- WT_RET_VRFY(session, "%s page at %s has %" WT_SIZET_FMT
- " unexpected bytes of data "
- "after the last cell",
+ WT_RET_VRFY(session,
+ "%s page at %s has %" WT_SIZET_FMT " unexpected bytes of data after the last cell",
__wt_page_type_string(dsk->type), tag, len);
}
@@ -862,7 +1032,7 @@ __verify_dsk_chunk(
end = (uint8_t *)dsk + dsk->mem_size;
/*
- * Fixed-length column-store and overflow pages are simple chunks of data. Verify the data
+ * Fixed-length column-store and overflow pages are simple chunks of data-> Verify the data
* doesn't overflow the end of the page.
*/
p = WT_PAGE_HEADER_BYTE(btree, dsk);
@@ -896,19 +1066,17 @@ __err_cell_corrupt(WT_SESSION_IMPL *session, int retval, uint32_t entry_num, con
static int
__err_cell_corrupt_or_eof(WT_SESSION_IMPL *session, int retval, uint32_t entry_num, const char *tag)
{
- WT_RET_VRFY_RETVAL(session, retval, "item %" PRIu32
- " on page at %s is a corrupted cell or references "
- "non-existent file pages",
+ WT_RET_VRFY_RETVAL(session, retval,
+ "item %" PRIu32 " on page at %s is a corrupted cell or references non-existent file pages",
entry_num, tag);
}
/*
- * __err_cell_type --
- * Generic illegal cell type for a particular page type error.
+ * __wt_cell_type_check --
+ * Check the cell type against the page type.
*/
-static int
-__err_cell_type(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag, uint8_t cell_type,
- uint8_t dsk_type)
+bool
+__wt_cell_type_check(uint8_t cell_type, uint8_t dsk_type)
{
switch (cell_type) {
case WT_CELL_ADDR_DEL:
@@ -916,22 +1084,22 @@ __err_cell_type(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag, u
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
if (dsk_type == WT_PAGE_COL_INT || dsk_type == WT_PAGE_ROW_INT)
- return (0);
+ return (true);
break;
case WT_CELL_DEL:
if (dsk_type == WT_PAGE_COL_VAR)
- return (0);
+ return (true);
break;
case WT_CELL_KEY:
case WT_CELL_KEY_OVFL:
case WT_CELL_KEY_SHORT:
if (dsk_type == WT_PAGE_ROW_INT || dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
+ return (true);
break;
case WT_CELL_KEY_PFX:
case WT_CELL_KEY_SHORT_PFX:
if (dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
+ return (true);
break;
case WT_CELL_KEY_OVFL_RM:
case WT_CELL_VALUE_OVFL_RM:
@@ -944,13 +1112,23 @@ __err_cell_type(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag, u
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_SHORT:
if (dsk_type == WT_PAGE_COL_VAR || dsk_type == WT_PAGE_ROW_LEAF)
- return (0);
- break;
- default:
+ return (true);
break;
}
+ return (false);
+}
- WT_RET_VRFY(session, "illegal cell and page type combination: cell %" PRIu32
- " on page at %s is a %s cell on a %s page",
- entry_num, tag, __wt_cell_type_string(cell_type), __wt_page_type_string(dsk_type));
+/*
+ * __err_cell_type --
+ * Generic illegal cell type for a particular page type error.
+ */
+static int
+__err_cell_type(WT_SESSION_IMPL *session, uint32_t entry_num, const char *tag, uint8_t cell_type,
+ uint8_t dsk_type)
+{
+ if (!__wt_cell_type_check(cell_type, dsk_type))
+ WT_RET_VRFY(session, "illegal cell and page type combination: cell %" PRIu32
+ " on page at %s is a %s cell on a %s page",
+ entry_num, tag, __wt_cell_type_string(cell_type), __wt_page_type_string(dsk_type));
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index 8db18398831..b9113df8f44 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -120,7 +120,7 @@ __wt_row_leaf_key_copy(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_
/*
* __wt_row_leaf_key_work --
- * Return a reference to, a row-store leaf-page key, optionally instantiate the key into the
+ * Return a reference to a row-store leaf-page key, optionally instantiate the key into the
* in-memory page.
*/
int
@@ -130,7 +130,7 @@ __wt_row_leaf_key_work(
enum { FORWARD, BACKWARD } direction;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_IKEY *ikey;
@@ -253,7 +253,7 @@ switch_and_jump:
/*
* It must be an on-page cell, unpack it.
*/
- __wt_cell_unpack(session, page, cell, unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, unpack);
/* 3: the test for an on-page reference to an overflow key. */
if (unpack->type == WT_CELL_KEY_OVFL) {
@@ -272,7 +272,7 @@ switch_and_jump:
__wt_readlock(session, &btree->ovfl_lock);
copy = WT_ROW_KEY_COPY(rip);
if (!__wt_row_leaf_key_info(page, copy, NULL, &cell, &keyb->data, &keyb->size)) {
- __wt_cell_unpack(session, page, cell, unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, unpack);
ret = __wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, keyb);
}
__wt_readunlock(session, &btree->ovfl_lock);
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index ed1db846793..ea4858b0c30 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -1070,7 +1070,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
*/
WT_ERR(__wt_buf_set(session, &upd_value->buf, hs_value->data, hs_value->size));
skip_buf:
- upd_value->start_ts = hs_start_ts;
+ upd_value->durable_ts = durable_timestamp;
upd_value->txnid = WT_TXN_NONE;
upd_value->type = upd_type;
upd_value->prepare_state =
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 1bcca8dc686..e466f5716f8 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1111,7 +1111,7 @@ struct __wt_update {
*/
struct __wt_update_value {
WT_ITEM buf;
- wt_timestamp_t start_ts;
+ wt_timestamp_t durable_ts;
uint64_t txnid;
uint8_t type;
uint8_t prepare_state;
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 7b3ff5b8f3d..4e6780566d7 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -728,7 +728,7 @@ __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep)
* Set a WT_REF to reference an on-page key.
*/
static inline void
-__wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack)
+__wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK_ADDR *unpack)
{
uintptr_t v;
@@ -930,7 +930,7 @@ __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell)
* Set a WT_ROW to reference an on-page row-store leaf key.
*/
static inline void
-__wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
+__wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK_KV *unpack)
{
uintptr_t v;
@@ -948,7 +948,7 @@ __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
* Set a WT_ROW to reference an on-page row-store leaf value.
*/
static inline void
-__wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
+__wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK_KV *unpack)
{
uintptr_t key_len, key_offset, value_offset, v;
@@ -1017,10 +1017,10 @@ __wt_row_leaf_key(
*/
static inline void
__wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
- WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack)
+ WT_CELL_UNPACK_KV *kpack, WT_CELL_UNPACK_KV *vpack)
{
WT_CELL *kcell, *vcell;
- WT_CELL_UNPACK unpack;
+ WT_CELL_UNPACK_KV unpack;
size_t size;
void *copy, *key;
@@ -1045,12 +1045,12 @@ __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
if (__wt_row_leaf_key_info(page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
vcell = (WT_CELL *)((uint8_t *)key + size);
else {
- __wt_cell_unpack(session, page, kcell, &unpack);
+ __wt_cell_unpack_kv(session, page->dsk, kcell, &unpack);
vcell = (WT_CELL *)((uint8_t *)unpack.cell + __wt_cell_total_len(&unpack));
}
}
- __wt_cell_unpack(session, page, __wt_cell_leaf_value_parse(page, vcell), vpack);
+ __wt_cell_unpack_kv(session, page->dsk, __wt_cell_leaf_value_parse(page, vcell), vpack);
}
/*
@@ -1094,7 +1094,7 @@ static inline bool
__wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
{
WT_ADDR *addr;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_ADDR *unpack, _unpack;
WT_PAGE *page;
unpack = &_unpack;
@@ -1122,7 +1122,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
}
/* If on-page, the pointer references a cell. */
- __wt_cell_unpack(session, page, (WT_CELL *)addr, unpack);
+ __wt_cell_unpack_addr(session, page->dsk, (WT_CELL *)addr, unpack);
__wt_time_aggregate_copy(&copy->ta, &unpack->ta);
copy->type = 0; /* Avoid static analyzer uninitialized value complaints. */
switch (unpack->raw) {
@@ -1696,7 +1696,7 @@ __wt_page_swap_func(WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32
*/
static inline int
__wt_bt_col_var_cursor_walk_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_PAGE *page,
- WT_CELL_UNPACK *unpack, WT_COL *cip)
+ WT_CELL_UNPACK_KV *unpack, WT_COL *cip)
{
cbt->slot = WT_COL_SLOT(page, cip);
WT_RET(__wt_txn_read(session, cbt, NULL, cbt->recno, NULL, unpack));
diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h
index 760811e5acf..d7a5452dbe1 100644
--- a/src/third_party/wiredtiger/src/include/cell.h
+++ b/src/third_party/wiredtiger/src/include/cell.h
@@ -144,37 +144,64 @@ struct __wt_cell {
uint8_t __chunk[1 + 1 + 1 + 7 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
};
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_CELL_UNPACK_OVERFLOW 0x1u /* cell is an overflow */
+#define WT_CELL_UNPACK_PREPARE 0x2u /* cell is part of a prepared transaction */
+#define WT_CELL_UNPACK_TIME_WINDOW_CLEARED 0x4u /* time window cleared because of restart */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+
/*
- * WT_CELL_UNPACK --
- * Unpacked cell.
+ * We have two "unpacked cell" structures: one holding holds unpacked cells from internal nodes
+ * (address pages), and one holding unpacked cells from leaf nodes (key/value pages). They share a
+ * common set of initial fields: in a few places where a function has to handle both types of
+ * unpacked cells, the unpacked cell structures are cast to an "unpack-common" structure that can
+ * only reference shared fields.
*/
-struct __wt_cell_unpack {
- WT_CELL *cell; /* Cell's disk image address */
+#define WT_CELL_COMMON_FIELDS \
+ WT_CELL *cell; /* Cell's disk image address */ \
+ \
+ uint64_t v; /* RLE count or recno */ \
+ \
+ /* \
+ * The size and __len fields are reasonably type size_t; don't change the type, performance \
+ * drops significantly if they're type size_t. \
+ */ \
+ const void *data; /* Data */ \
+ uint32_t size; /* Data size */ \
+ \
+ uint32_t __len; /* Cell + data length (usually) */ \
+ \
+ uint8_t prefix; /* Cell prefix length */ \
+ \
+ uint8_t raw; /* Raw cell type (include "shorts") */ \
+ uint8_t type; /* Cell type */ \
+ \
+ uint8_t flags
- WT_TIME_AGGREGATE ta; /* Address validity window */
- WT_TIME_WINDOW tw; /* Value validity window */
-
- uint64_t v; /* RLE count or recno */
-
- /*
- * !!!
- * The size and __len fields are reasonably type size_t; don't change
- * the type, performance drops significantly if they're type size_t.
- */
- const void *data; /* Data */
- uint32_t size; /* Data size */
+/*
+ * WT_CELL_UNPACK_COMMON --
+ * Unpacked address cell, the common fields.
+ */
+struct __wt_cell_unpack_common {
+ WT_CELL_COMMON_FIELDS;
+};
- uint32_t __len; /* Cell + data length (usually) */
+/*
+ * WT_CELL_UNPACK_ADDR --
+ * Unpacked address cell.
+ */
+struct __wt_cell_unpack_addr {
+ WT_CELL_COMMON_FIELDS;
- uint8_t prefix; /* Cell prefix length */
+ WT_TIME_AGGREGATE ta; /* Address validity window */
+};
- uint8_t raw; /* Raw cell type (include "shorts") */
- uint8_t type; /* Cell type */
+/*
+ * WT_CELL_UNPACK_KV --
+ * Unpacked value cell.
+ */
+struct __wt_cell_unpack_kv {
+ WT_CELL_COMMON_FIELDS;
-/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CELL_UNPACK_OVERFLOW 0x1u /* cell is an overflow */
-#define WT_CELL_UNPACK_PREPARE 0x2u /* cell is part of a prepared transaction */
-#define WT_CELL_UNPACK_TIME_WINDOW_CLEARED 0x4u /* time window cleared because of restart */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
- uint8_t flags;
+ WT_TIME_WINDOW tw; /* Value validity window */
};
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index f14eb7f8d15..427ca83b124 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -570,7 +570,7 @@ __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, WT_TI
* Return the cell's RLE value.
*/
static inline uint64_t
-__wt_cell_rle(WT_CELL_UNPACK *unpack)
+__wt_cell_rle(WT_CELL_UNPACK_KV *unpack)
{
/*
* Any item with only 1 occurrence is stored with an RLE of 0, that is, without any RLE at all.
@@ -584,8 +584,12 @@ __wt_cell_rle(WT_CELL_UNPACK *unpack)
* Return the cell's total length, including data.
*/
static inline size_t
-__wt_cell_total_len(WT_CELL_UNPACK *unpack)
+__wt_cell_total_len(void *unpack_arg)
{
+ WT_CELL_UNPACK_COMMON *unpack;
+
+ unpack = unpack_arg;
+
/*
* The length field is specially named because it's dangerous to use it: it represents the
* length of the current cell (normally used for the loop that walks through cells on the page),
@@ -696,18 +700,31 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
}
/*
+ * The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which
+ * case, make sure all reads are inside the page image. If an error occurs, return an error code but
+ * don't output messages, our caller handles that.
+ */
+#define WT_CELL_LEN_CHK(t, len) \
+ do { \
+ if (end != NULL && \
+ ((uint8_t *)(t) < (uint8_t *)dsk || (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \
+ return (WT_ERROR); \
+ } while (0)
+
+/*
* __wt_cell_unpack_safe --
* Unpack a WT_CELL into a structure, with optional boundary checks.
*/
static inline int
__wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell,
- WT_CELL_UNPACK *unpack, const void *end)
+ WT_CELL_UNPACK_ADDR *unpack_addr, WT_CELL_UNPACK_KV *unpack_value, const void *end)
{
struct {
uint64_t v;
- WT_TIME_WINDOW tw;
uint32_t len;
+ WT_TIME_WINDOW tw;
} copy;
+ WT_CELL_UNPACK_COMMON *unpack;
WT_TIME_AGGREGATE *ta;
WT_TIME_WINDOW *tw;
uint64_t v;
@@ -719,20 +736,19 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
copy.len = 0; /* [-Wconditional-uninitialized] */
copy.v = 0; /* [-Wconditional-uninitialized] */
- tw = &unpack->tw;
- ta = &unpack->ta;
+ if (unpack_addr == NULL) {
+ unpack = (WT_CELL_UNPACK_COMMON *)unpack_value;
+ tw = &unpack_value->tw;
+ __wt_time_window_init(tw);
+ ta = NULL;
+ } else {
+ WT_ASSERT(session, unpack_value == NULL);
-/*
- * The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which
- * case, make sure all reads are inside the page image. If an error occurs, return an error code but
- * don't output messages, our caller handles that.
- */
-#define WT_CELL_LEN_CHK(t, len) \
- do { \
- if (end != NULL && \
- ((uint8_t *)(t) < (uint8_t *)dsk || (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \
- return (WT_ERROR); \
- } while (0)
+ unpack = (WT_CELL_UNPACK_COMMON *)unpack_addr;
+ ta = &unpack_addr->ta;
+ __wt_time_aggregate_init(ta);
+ tw = NULL;
+ }
/*
* NB: when unpacking a WT_CELL_VALUE_COPY cell, unpack.cell is returned as the original cell,
@@ -741,7 +757,7 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
*/
unpack->cell = cell;
-restart:
+copy_cell_restart:
WT_CELL_LEN_CHK(cell, 0);
/*
@@ -751,15 +767,14 @@ restart:
* following switch. All validity windows default to durability.
*/
unpack->v = 0;
- __wt_time_window_init(&unpack->tw);
- __wt_time_aggregate_init(&unpack->ta);
unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
unpack->type = (uint8_t)__wt_cell_type(cell);
unpack->flags = 0;
/*
- * Handle cells with none of RLE counts, validity window or data length: short key/data cells
- * have 6 bits of data length in the descriptor byte and nothing else.
+ * Handle cells with none of RLE counts, validity window or data length: WT_CELL_KEY_SHORT_PFX,
+ * WT_CELL_KEY_SHORT and WT_CELL_VALUE_SHORT. Short key/data cells have 6 bits of data length in
+ * the descriptor byte and nothing else
*/
switch (unpack->raw) {
case WT_CELL_KEY_SHORT_PFX:
@@ -768,14 +783,14 @@ restart:
unpack->data = cell->__chunk + 2;
unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
unpack->__len = 2 + unpack->size;
- goto done;
+ goto done; /* Handle copy cells. */
case WT_CELL_KEY_SHORT:
case WT_CELL_VALUE_SHORT:
unpack->prefix = 0;
unpack->data = cell->__chunk + 1;
unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
unpack->__len = 1 + unpack->size;
- goto done;
+ goto done; /* Handle copy cells. */
}
unpack->prefix = 0;
@@ -800,6 +815,14 @@ restart:
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
+ /*
+ * Skip if we know we're not unpacking a cell of this type. This is all inlined code, and
+ * ideally checking allows the compiler to discard big chunks of it.
+ */
+ WT_ASSERT(session, unpack_value == NULL);
+ if (unpack_value != NULL)
+ break;
+
if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
break;
flags = *p++; /* skip second descriptor byte */
@@ -842,6 +865,14 @@ restart:
case WT_CELL_VALUE_COPY:
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
+ /*
+ * Skip if we know we're not unpacking a cell of this type. This is all inlined code, and
+ * ideally checking allows the compiler to discard big chunks of it.
+ */
+ WT_ASSERT(session, unpack_addr == NULL);
+ if (unpack_addr != NULL)
+ break;
+
if ((cell->__chunk[0] & WT_CELL_SECOND_DESC) == 0)
break;
flags = *p++; /* skip second descriptor byte */
@@ -895,6 +926,14 @@ restart:
*/
switch (unpack->raw) {
case WT_CELL_VALUE_COPY:
+ /*
+ * Skip if we know we're not unpacking a cell of this type. This is all inlined code, and
+ * ideally checking allows the compiler to discard big chunks of it.
+ */
+ WT_ASSERT(session, unpack_addr == NULL);
+ if (unpack_addr != NULL)
+ break;
+
copy_cell = true;
/*
@@ -902,12 +941,13 @@ restart:
* visibility window, length and RLE of this cell, we need the length to step through the
* set of cells on the page and the RLE and timestamp information are specific to this cell.
*/
- __wt_time_window_copy(&copy.tw, tw);
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
copy.v = unpack->v;
copy.len = WT_PTRDIFF32(p, cell);
+ tw = &copy.tw;
+ __wt_time_window_init(tw);
cell = (WT_CELL *)((uint8_t *)cell - v);
- goto restart;
+ goto copy_cell_restart;
case WT_CELL_KEY_OVFL:
case WT_CELL_KEY_OVFL_RM:
@@ -954,57 +994,34 @@ restart:
done:
/*
- * Check the original cell against the full cell length (this is a diagnostic as well, we may be
- * copying the cell from the page and we need the right length).
+ * Skip if we know we're not unpacking a cell of this type. This is all inlined code, and
+ * ideally checking allows the compiler to discard big chunks of it.
*/
- WT_CELL_LEN_CHK(cell, unpack->__len);
- if (copy_cell) {
- __wt_time_window_copy(tw, &copy.tw);
+ if (unpack_addr == NULL && copy_cell) {
unpack->v = copy.v;
unpack->__len = copy.len;
unpack->raw = WT_CELL_VALUE_COPY;
}
+ /*
+ * Check the original cell against the full cell length (this is a diagnostic as well, we may be
+ * copying the cell from the page and we need the right length).
+ */
+ WT_CELL_LEN_CHK(cell, unpack->__len);
return (0);
}
/*
- * __wt_cell_unpack_dsk --
- * Unpack a WT_CELL into a structure.
+ * __cell_unpack_window_cleanup --
+ * Clean up cells loaded from a previous run.
*/
static inline void
-__wt_cell_unpack_dsk(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__cell_unpack_window_cleanup(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
+ WT_CELL_UNPACK_ADDR *unpack_addr, WT_CELL_UNPACK_KV *unpack_kv)
{
WT_TIME_AGGREGATE *ta;
WT_TIME_WINDOW *tw;
- ta = &unpack->ta;
- tw = &unpack->tw;
-
- /*
- * Row-store doesn't store zero-length values on pages, but this allows us to pretend.
- */
- if (cell == NULL) {
- unpack->cell = NULL;
- unpack->v = 0;
- /*
- * If there isn't any value validity window (which is what it will take to get to a
- * zero-length item), the value must be stable.
- */
- __wt_time_window_init(tw);
- __wt_time_aggregate_init(ta);
- unpack->data = "";
- unpack->size = 0;
- unpack->__len = 0;
- unpack->prefix = 0;
- unpack->raw = unpack->type = WT_CELL_VALUE;
- unpack->flags = 0;
- return;
- }
-
- WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack, NULL));
-
/*
* If the page came from a previous run, reset the transaction ids to "none" and timestamps to 0
* as appropriate. Transaction ids shouldn't persist between runs so these are always set to
@@ -1020,42 +1037,85 @@ __wt_cell_unpack_dsk(
* Current startup txnid=x, ts=y txnid=x, ts=WT_TS_NONE txnid=MAX, ts=MAX
* Previous startup txnid=0, ts=y txnid=0, ts=WT_TS_NONE txnid=MAX, ts=MAX
*/
- if (dsk->write_gen > 0 && dsk->write_gen <= S2C(session)->base_write_gen) {
- /* FIXME-WT-6124: deal with durable timestamps. */
- /* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
- if (tw->start_txn != WT_TXN_NONE) {
- tw->start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
- }
- if (tw->stop_txn != WT_TXN_MAX) {
- tw->stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
- if (tw->stop_ts == WT_TS_MAX)
- tw->stop_ts = WT_TS_NONE;
- } else
- WT_ASSERT(session, tw->stop_ts == WT_TS_MAX);
+ if (dsk->write_gen == 0 || dsk->write_gen > S2C(session)->base_write_gen)
+ return;
+
+ /* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
+ /* FIXME-WT-6124: deal with durable timestamps. */
+ if (unpack_addr != NULL) {
+ ta = &unpack_addr->ta;
if (ta->oldest_start_txn != WT_TXN_NONE) {
ta->oldest_start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ F_SET(unpack_addr, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
}
if (ta->newest_stop_txn != WT_TXN_MAX) {
ta->newest_stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ F_SET(unpack_addr, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
if (ta->newest_stop_ts == WT_TS_MAX)
ta->newest_stop_ts = WT_TS_NONE;
} else
WT_ASSERT(session, ta->newest_stop_ts == WT_TS_MAX);
}
+ if (unpack_kv != NULL) {
+ tw = &unpack_kv->tw;
+ if (tw->start_txn != WT_TXN_NONE) {
+ tw->start_txn = WT_TXN_NONE;
+ F_SET(unpack_kv, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ }
+ if (tw->stop_txn != WT_TXN_MAX) {
+ tw->stop_txn = WT_TXN_NONE;
+ F_SET(unpack_kv, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ if (tw->stop_ts == WT_TS_MAX)
+ tw->stop_ts = WT_TS_NONE;
+ } else
+ WT_ASSERT(session, tw->stop_ts == WT_TS_MAX);
+ }
}
/*
- * __wt_cell_unpack --
- * Unpack a WT_CELL into a structure.
+ * __wt_cell_unpack_addr --
+ * Unpack an address WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__wt_cell_unpack_addr(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell,
+ WT_CELL_UNPACK_ADDR *unpack_addr)
{
- __wt_cell_unpack_dsk(session, page->dsk, cell, unpack);
+ WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, unpack_addr, NULL, NULL));
+ __cell_unpack_window_cleanup(session, dsk, unpack_addr, NULL);
+}
+
+/*
+ * __wt_cell_unpack_kv --
+ * Unpack a value WT_CELL into a structure.
+ */
+static inline void
+__wt_cell_unpack_kv(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell,
+ WT_CELL_UNPACK_KV *unpack_value)
+{
+ /*
+ * Row-store doesn't store zero-length values on pages, but this allows us to pretend.
+ */
+ if (cell == NULL) {
+ unpack_value->cell = NULL;
+ unpack_value->v = 0;
+ unpack_value->data = "";
+ unpack_value->size = 0;
+ unpack_value->__len = 0;
+ unpack_value->prefix = 0;
+ unpack_value->raw = unpack_value->type = WT_CELL_VALUE;
+ unpack_value->flags = 0;
+
+ /*
+ * If there isn't any value validity window (which is what it will take to get to a
+ * zero-length item), the value must be stable.
+ */
+ __wt_time_window_init(&unpack_value->tw);
+
+ return;
+ }
+
+ WT_IGNORE_RET(__wt_cell_unpack_safe(session, dsk, cell, NULL, unpack_value, NULL));
+ __cell_unpack_window_cleanup(session, dsk, NULL, unpack_value);
}
/*
@@ -1063,8 +1123,8 @@ __wt_cell_unpack(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL
* Set a buffer to reference the data from an unpacked cell.
*/
static inline int
-__cell_data_ref(
- WT_SESSION_IMPL *session, WT_PAGE *page, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, int page_type,
+ WT_CELL_UNPACK_COMMON *unpack, WT_ITEM *store)
{
WT_BTREE *btree;
bool decoded;
@@ -1119,9 +1179,12 @@ __cell_data_ref(
* be of type WT_CELL_VALUE_OVFL_RM, and calling the "page" version means it might be.
*/
static inline int
-__wt_dsk_cell_data_ref(
- WT_SESSION_IMPL *session, int page_type, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type, void *unpack_arg, WT_ITEM *store)
{
+ WT_CELL_UNPACK_COMMON *unpack;
+
+ unpack = unpack_arg;
+
WT_ASSERT(session, __wt_cell_type_raw(unpack->cell) != WT_CELL_VALUE_OVFL_RM);
return (__cell_data_ref(session, NULL, page_type, unpack, store));
}
@@ -1131,23 +1194,30 @@ __wt_dsk_cell_data_ref(
* Set a buffer to reference the data from an unpacked cell.
*/
static inline int
-__wt_page_cell_data_ref(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store)
+__wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, void *unpack_arg, WT_ITEM *store)
{
- return (__cell_data_ref(session, page, page->type, unpack, store));
+ return (__cell_data_ref(session, page, page->type, unpack_arg, store));
}
/*
* WT_CELL_FOREACH --
* Walk the cells on a page.
*/
-#define WT_CELL_FOREACH_BEGIN(session, btree, dsk, unpack) \
- do { \
- uint32_t __i; \
- uint8_t *__cell; \
- for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), __i = (dsk)->u.entries; __i > 0; \
- __cell += (unpack).__len, --__i) { \
- __wt_cell_unpack_dsk(session, dsk, (WT_CELL *)__cell, &(unpack));
+#define WT_CELL_FOREACH_ADDR(session, dsk, unpack) \
+ do { \
+ uint32_t __i; \
+ uint8_t *__cell; \
+ for (__cell = WT_PAGE_HEADER_BYTE(S2BT(session), dsk), __i = (dsk)->u.entries; __i > 0; \
+ __cell += (unpack).__len, --__i) { \
+ __wt_cell_unpack_addr(session, dsk, (WT_CELL *)__cell, &(unpack));
+
+#define WT_CELL_FOREACH_KV(session, dsk, unpack) \
+ do { \
+ uint32_t __i; \
+ uint8_t *__cell; \
+ for (__cell = WT_PAGE_HEADER_BYTE(S2BT(session), dsk), __i = (dsk)->u.entries; __i > 0; \
+ __cell += (unpack).__len, --__i) { \
+ __wt_cell_unpack_kv(session, dsk, (WT_CELL *)__cell, &(unpack));
#define WT_CELL_FOREACH_END \
} \
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 81cc28feb08..d3237ad91d2 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -409,7 +409,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
*/
static inline int
__cursor_row_slot_key_return(
- WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_CELL_UNPACK *kpack, bool *kpack_used)
+ WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_CELL_UNPACK_KV *kpack, bool *kpack_used)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -454,7 +454,7 @@ __cursor_row_slot_key_return(
* compiler complaining about uninitialized field use.
*/
memset(kpack, 0, sizeof(*kpack));
- __wt_cell_unpack(session, page, cell, kpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, kpack);
*kpack_used = true;
if (kpack->type == WT_CELL_KEY && cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h
index 76c644a2850..39cce1e5b34 100644
--- a/src/third_party/wiredtiger/src/include/error.h
+++ b/src/third_party/wiredtiger/src/include/error.h
@@ -166,6 +166,9 @@
} while (0)
#endif
+/* Verbose messages. */
+#define WT_VERBOSE_ISSET(session, flag) (FLD_ISSET(S2C(session)->verbose, flag))
+
/*
* __wt_verbose --
* Display a verbose message. Not an inlined function because you can't inline functions taking
@@ -174,8 +177,8 @@
* additional argument, there's no portable way to remove the comma before an empty __VA_ARGS__
* value.
*/
-#define __wt_verbose(session, flag, fmt, ...) \
- do { \
- if (WT_VERBOSE_ISSET(session, flag)) \
- __wt_verbose_worker(session, fmt, __VA_ARGS__); \
+#define __wt_verbose(session, flag, fmt, ...) \
+ do { \
+ if (WT_VERBOSE_ISSET(session, flag)) \
+ __wt_verbose_worker(session, "[" #flag "] " fmt, __VA_ARGS__); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index a5dfb85bf89..81e6a7c81aa 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -11,6 +11,8 @@ extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_cell_type_check(uint8_t cell_type, uint8_t dsk_type)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_checksum_alt_match(const void *chunk, size_t len, uint32_t v)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
@@ -595,7 +597,7 @@ extern int __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile)
extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_key_value(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno,
- uint64_t rle, WT_CELL_UNPACK *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ uint64_t rle, WT_CELL_UNPACK_KV *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size,
@@ -1126,9 +1128,9 @@ extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *c
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack,
+extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK_COMMON *unpack,
WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack)
+extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK_KV *unpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr,
size_t addr_size, const void *value, size_t value_size)
@@ -1206,7 +1208,7 @@ extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page,
uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
- void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
+ void *ripcip, WT_CELL_UNPACK_KV *vpack, WT_UPDATE_SELECT *upd_select)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage,
uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1845,7 +1847,7 @@ static inline bool __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id,
static inline double __wt_eviction_dirty_target(WT_CACHE *cache)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_bt_col_var_cursor_walk_txn_read(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_COL *cip)
+ WT_CURSOR_BTREE *cbt, WT_PAGE *page, WT_CELL_UNPACK_KV *unpack, WT_COL *cip)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_btree_block_free(WT_SESSION_IMPL *session, const uint8_t *addr,
size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1866,7 +1868,7 @@ static inline int __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy,
static inline int __wt_cell_pack_value_match(WT_CELL *page_cell, WT_CELL *val_cell,
const uint8_t *val_data, bool *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
- WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end)
+ WT_CELL *cell, WT_CELL_UNPACK_ADDR *unpack_addr, WT_CELL_UNPACK_KV *unpack_value, const void *end)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size,
@@ -1882,8 +1884,8 @@ static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type,
- WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_dsk_cell_data_ref(WT_SESSION_IMPL *session, int page_type, void *unpack_arg,
+ WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_extlist_read_pair(const uint8_t **p, wt_off_t *offp, wt_off_t *sizep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_extlist_write_pair(uint8_t **p, wt_off_t off, wt_off_t size)
@@ -1934,8 +1936,8 @@ static inline int __wt_lex_compare_skip(const WT_ITEM *user_item, const WT_ITEM
size_t *matchp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_page_cell_data_ref(WT_SESSION_IMPL *session, WT_PAGE *page, void *unpack_arg,
+ WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_page_parent_modify_set(WT_SESSION_IMPL *session, WT_REF *ref, bool page_only)
@@ -2007,7 +2009,7 @@ static inline int __wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *
static inline int __wt_txn_op_set_key(WT_SESSION_IMPL *session, const WT_ITEM *key)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key,
- uint64_t recno, WT_UPDATE *upd, WT_CELL_UNPACK *vpack)
+ uint64_t recno, WT_UPDATE *upd, WT_CELL_UNPACK_KV *vpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2061,7 +2063,7 @@ static inline size_t __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell
WT_TIME_WINDOW *tw, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell,
WT_TIME_WINDOW *tw, uint64_t rle, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline size_t __wt_cell_total_len(WT_CELL_UNPACK *unpack)
+static inline size_t __wt_cell_total_len(void *unpack_arg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline size_t __wt_strnlen(const char *s, size_t maxlen)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2104,7 +2106,7 @@ static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK *unpack)
+static inline uint64_t __wt_cell_rle(WT_CELL_UNPACK_KV *unpack)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline uint64_t __wt_clock(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2136,10 +2138,10 @@ static inline void __wt_cache_update_hs_score(
WT_SESSION_IMPL *session, u_int updates_seen, u_int updates_unstable);
static inline void __wt_cell_type_reset(
WT_SESSION_IMPL *session, WT_CELL *cell, u_int old_type, u_int new_type);
-static inline void __wt_cell_unpack(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack);
-static inline void __wt_cell_unpack_dsk(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack);
+static inline void __wt_cell_unpack_addr(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
+ WT_CELL *cell, WT_CELL_UNPACK_ADDR *unpack_addr);
+static inline void __wt_cell_unpack_kv(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
+ WT_CELL *cell, WT_CELL_UNPACK_KV *unpack_value);
static inline void __wt_check_addr_validity(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta);
static inline void __wt_cond_wait(
WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *));
@@ -2155,18 +2157,18 @@ static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_rec_addr_ts_init(WT_RECONCILE *r, WT_TIME_AGGREGATE *ta);
static inline void __wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
- WT_ADDR *addr, WT_CELL_UNPACK *vpack, bool proxy_cell, uint64_t recno);
+ WT_ADDR *addr, WT_CELL_UNPACK_ADDR *vpack, bool proxy_cell, uint64_t recno);
static inline void __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv);
static inline void __wt_rec_incr(
WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size);
static inline void __wt_ref_key(WT_PAGE *page, WT_REF *ref, void *keyp, size_t *sizep);
static inline void __wt_ref_key_clear(WT_REF *ref);
-static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK *unpack);
-static inline void __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack);
+static inline void __wt_ref_key_onpage_set(WT_PAGE *page, WT_REF *ref, WT_CELL_UNPACK_ADDR *unpack);
+static inline void __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK_KV *unpack);
static inline void __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell);
static inline void __wt_row_leaf_value_cell(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
- WT_CELL_UNPACK *kpack, WT_CELL_UNPACK *vpack);
-static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack);
+ WT_CELL_UNPACK_KV *kpack, WT_CELL_UNPACK_KV *vpack);
+static inline void __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK_KV *unpack);
static inline void __wt_scr_free(WT_SESSION_IMPL *session, WT_ITEM **bufp);
static inline void __wt_seconds(WT_SESSION_IMPL *session, uint64_t *secondsp);
static inline void __wt_seconds32(WT_SESSION_IMPL *session, uint32_t *secondsp);
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index b2df8478dd7..45dd0b42960 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -235,9 +235,6 @@
} \
} while (0)
-/* Verbose messages. */
-#define WT_VERBOSE_ISSET(session, f) (FLD_ISSET(S2C(session)->verbose, f))
-
#define WT_CLEAR(s) memset(&(s), 0, sizeof(s))
/* Check if a string matches a prefix. */
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 647c015e26e..a8d0c205aad 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -31,7 +31,6 @@ struct __wt_reconcile {
/* Track the page's min/maximum transactions. */
uint64_t max_txn;
wt_timestamp_t max_ts;
- wt_timestamp_t max_ondisk_ts;
wt_timestamp_t min_skipped_ts;
u_int updates_seen; /* Count of updates seen. */
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
index 3f9339a81ab..8da14bc93ad 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.i
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -122,7 +122,7 @@ __wt_rec_image_copy(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv)
*/
static inline void
__wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ADDR *addr,
- WT_CELL_UNPACK *vpack, bool proxy_cell, uint64_t recno)
+ WT_CELL_UNPACK_ADDR *vpack, bool proxy_cell, uint64_t recno)
{
WT_REC_KV *val;
u_int cell_type;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 0cd8c89c9a7..90858eb6950 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -580,7 +580,11 @@ __wt_txn_upd_visible_all(WT_SESSION_IMPL *session, WT_UPDATE *upd)
if (upd->prepare_state == WT_PREPARE_LOCKED || upd->prepare_state == WT_PREPARE_INPROGRESS)
return (false);
- return (__wt_txn_visible_all(session, upd->txnid, upd->start_ts));
+ /*
+ * This function is used to determine when an update is obsolete: that should take into account
+ * the durable timestamp which is greater than or equal to the start timestamp.
+ */
+ return (__wt_txn_visible_all(session, upd->txnid, upd->durable_ts));
}
/*
@@ -594,7 +598,7 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va
upd_value->prepare_state == WT_PREPARE_INPROGRESS)
return (false);
- return (__wt_txn_visible_all(session, upd_value->txnid, upd_value->start_ts));
+ return (__wt_txn_visible_all(session, upd_value->txnid, upd_value->durable_ts));
}
/*
@@ -837,7 +841,7 @@ __wt_txn_read_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE
*/
static inline int
__wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno,
- WT_UPDATE *upd, WT_CELL_UNPACK *vpack)
+ WT_UPDATE *upd, WT_CELL_UNPACK_KV *vpack)
{
WT_TIME_WINDOW tw;
@@ -873,10 +877,10 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
__wt_txn_visible(session, tw.stop_txn, tw.stop_ts) &&
((!F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) &&
(!WT_IS_HS(S2BT(session)) || !F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE))) ||
- __wt_txn_visible_all(session, tw.stop_txn, tw.stop_ts))) {
+ __wt_txn_visible_all(session, tw.stop_txn, tw.durable_stop_ts))) {
cbt->upd_value->buf.data = NULL;
cbt->upd_value->buf.size = 0;
- cbt->upd_value->start_ts = tw.stop_ts;
+ cbt->upd_value->durable_ts = tw.durable_stop_ts;
cbt->upd_value->txnid = tw.stop_txn;
cbt->upd_value->type = WT_UPDATE_TOMBSTONE;
cbt->upd_value->prepare_state = WT_PREPARE_INIT;
@@ -900,7 +904,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
cbt->upd_value->buf.data = NULL;
cbt->upd_value->buf.size = 0;
}
- cbt->upd_value->start_ts = tw.start_ts;
+ cbt->upd_value->durable_ts = tw.durable_start_ts;
cbt->upd_value->txnid = tw.start_txn;
cbt->upd_value->type = WT_UPDATE_STANDARD;
cbt->upd_value->prepare_state = WT_PREPARE_INIT;
@@ -1276,7 +1280,7 @@ __wt_upd_value_assign(WT_UPDATE_VALUE *upd_value, WT_UPDATE *upd)
upd_value->buf.data = upd->data;
upd_value->buf.size = upd->size;
}
- upd_value->start_ts = upd->start_ts;
+ upd_value->durable_ts = upd->durable_ts;
upd_value->txnid = upd->txnid;
upd_value->type = upd->type;
upd_value->prepare_state = upd->prepare_state;
@@ -1295,7 +1299,7 @@ __wt_upd_value_clear(WT_UPDATE_VALUE *upd_value)
*/
upd_value->buf.data = NULL;
upd_value->buf.size = 0;
- upd_value->start_ts = WT_TS_NONE;
+ upd_value->durable_ts = WT_TS_NONE;
upd_value->txnid = WT_TXN_NONE;
upd_value->type = WT_UPDATE_INVALID;
upd_value->prepare_state = WT_PREPARE_INIT;
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index bdf26c80663..113113f9e93 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -107,8 +107,12 @@ struct __wt_capacity;
typedef struct __wt_capacity WT_CAPACITY;
struct __wt_cell;
typedef struct __wt_cell WT_CELL;
-struct __wt_cell_unpack;
-typedef struct __wt_cell_unpack WT_CELL_UNPACK;
+struct __wt_cell_unpack_addr;
+typedef struct __wt_cell_unpack_addr WT_CELL_UNPACK_ADDR;
+struct __wt_cell_unpack_common;
+typedef struct __wt_cell_unpack_common WT_CELL_UNPACK_COMMON;
+struct __wt_cell_unpack_kv;
+typedef struct __wt_cell_unpack_kv WT_CELL_UNPACK_KV;
struct __wt_ckpt;
typedef struct __wt_ckpt WT_CKPT;
struct __wt_col;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index 18a4a16b556..c49a3f11bfa 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -192,7 +192,7 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
{
WT_ADDR *addr;
WT_BTREE *btree;
- WT_CELL_UNPACK *vpack, _vpack;
+ WT_CELL_UNPACK_ADDR *vpack, _vpack;
WT_CHILD_STATE state;
WT_DECL_RET;
WT_PAGE *child, *page;
@@ -275,7 +275,7 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
if (addr == NULL && __wt_off_page(page, ref->addr))
addr = ref->addr;
if (addr == NULL) {
- __wt_cell_unpack(session, page, ref->addr, vpack);
+ __wt_cell_unpack_addr(session, page->dsk, ref->addr, vpack);
val->buf.data = ref->addr;
val->buf.size = __wt_cell_total_len(vpack);
val->cell_len = 0;
@@ -571,7 +571,7 @@ __wt_rec_col_var(
} last;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *vpack, _vpack;
+ WT_CELL_UNPACK_KV *vpack, _vpack;
WT_COL *cip;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(orig);
@@ -654,7 +654,7 @@ __wt_rec_col_var(
WT_COL_FOREACH (page, cip, i) {
ovfl_state = OVFL_IGNORE;
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(session, page, cell, vpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, vpack);
nrepeat = __wt_cell_rle(vpack);
ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 5365e077b65..66331a663b5 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -294,7 +294,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_ADDR *addr;
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+ WT_CELL_UNPACK_ADDR *kpack, _kpack, *vpack, _vpack;
WT_CHILD_STATE state;
WT_DECL_RET;
WT_IKEY *ikey;
@@ -353,7 +353,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
ikey = __wt_ref_key_instantiated(ref);
if (ikey != NULL && ikey->cell_offset != 0) {
cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- __wt_cell_unpack(session, page, cell, kpack);
+ __wt_cell_unpack_addr(session, page->dsk, cell, kpack);
key_onpage_ovfl =
F_ISSET(kpack, WT_CELL_UNPACK_OVERFLOW) && kpack->raw != WT_CELL_KEY_OVFL_RM;
}
@@ -433,7 +433,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
__wt_rec_cell_build_addr(session, r, addr, NULL, state == WT_CHILD_PROXY, WT_RECNO_OOB);
__wt_time_aggregate_copy(&ta, &addr->ta);
} else {
- __wt_cell_unpack(session, page, ref->addr, vpack);
+ __wt_cell_unpack_addr(session, page->dsk, ref->addr, vpack);
if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED)) {
/*
* The transaction ids are cleared after restart. Repack the cell with new validity
@@ -535,7 +535,7 @@ __rec_row_zero_len(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
*/
return ((tw->stop_ts == WT_TS_MAX && tw->stop_txn == WT_TXN_MAX) &&
((tw->start_ts == WT_TS_NONE && tw->start_txn == WT_TXN_NONE) ||
- __wt_txn_visible_all(session, tw->start_txn, tw->start_ts)));
+ __wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts)));
}
/*
@@ -633,8 +633,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
* Repack a cell.
*/
static inline int
-__rec_cell_repack(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r, WT_CELL_UNPACK *vpack,
- WT_TIME_WINDOW *tw)
+__rec_cell_repack(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r,
+ WT_CELL_UNPACK_KV *vpack, WT_TIME_WINDOW *tw)
{
WT_DECL_ITEM(tmpval);
WT_DECL_RET;
@@ -671,7 +671,7 @@ __wt_rec_row_leaf(
static WT_UPDATE upd_tombstone = {.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE};
WT_BTREE *btree;
WT_CELL *cell;
- WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+ WT_CELL_UNPACK_KV *kpack, _kpack, *vpack, _vpack;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(tmpkey);
WT_DECL_RET;
@@ -740,7 +740,7 @@ __wt_rec_row_leaf(
kpack = NULL;
else {
kpack = &_kpack;
- __wt_cell_unpack(session, page, cell, kpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, kpack);
}
/* Unpack the on-page value cell. */
@@ -767,7 +767,7 @@ __wt_rec_row_leaf(
* new updates for that key, skip writing that key.
*/
if (upd == NULL && (tw.stop_txn != WT_TXN_MAX || tw.stop_ts != WT_TS_MAX) &&
- __wt_txn_visible_all(session, tw.stop_txn, tw.stop_ts))
+ __wt_txn_visible_all(session, tw.stop_txn, tw.durable_stop_ts))
upd = &upd_tombstone;
/* Build value cell. */
@@ -910,7 +910,7 @@ __wt_rec_row_leaf(
goto build;
kpack = &_kpack;
- __wt_cell_unpack(session, page, cell, kpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, kpack);
if (btree->huffman_key == NULL && kpack->type == WT_CELL_KEY &&
tmpkey->size >= kpack->prefix && tmpkey->size != 0) {
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index b1a3b93eee8..1fb6b072d0e 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -31,13 +31,13 @@ __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page)
static int
__ovfl_discard_verbose(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, const char *tag)
{
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_DECL_ITEM(tmp);
WT_RET(__wt_scr_alloc(session, 512, &tmp));
unpack = &_unpack;
- __wt_cell_unpack(session, page, cell, unpack);
+ __wt_cell_unpack_kv(session, page->dsk, cell, unpack);
__wt_verbose(session, WT_VERB_OVERFLOW, "discard: %s%s%p %s", tag == NULL ? "" : tag,
tag == NULL ? "" : ": ", (void *)page,
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 93991af5983..b428ab8a3e9 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -18,7 +18,7 @@ __rec_update_stable(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *upd)
return (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
__wt_txn_upd_visible_all(session, upd) :
__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE &&
- __wt_txn_visible(session, upd->txnid, upd->start_ts));
+ __wt_txn_visible(session, upd->txnid, upd->durable_ts));
}
/*
@@ -54,17 +54,19 @@ __rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, voi
*/
static int
__rec_append_orig_value(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
+ WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK_KV *unpack)
{
WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_UPDATE *append, *oldest_upd, *tombstone;
size_t size, total_size;
+ bool tombstone_globally_visible;
WT_ASSERT(session, upd != NULL && unpack != NULL && unpack->type != WT_CELL_DEL);
append = oldest_upd = tombstone = NULL;
total_size = 0;
+ tombstone_globally_visible = false;
/* Review the current update list, checking conditions that mean no work is needed. */
for (;; upd = upd->next) {
@@ -110,20 +112,6 @@ __rec_append_orig_value(
break;
}
- /* Done if the stop time pair of the onpage cell is globally visible. */
- if ((unpack->tw.stop_ts != WT_TS_MAX || unpack->tw.stop_txn != WT_TXN_MAX) &&
- __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.stop_ts))
- return (0);
-
- /* We need the original on-page value for some reader: get a copy. */
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
- WT_ERR(__wt_upd_alloc(session, tmp, WT_UPDATE_STANDARD, &append, &size));
- total_size += size;
- append->txnid = unpack->tw.start_txn;
- append->start_ts = unpack->tw.start_ts;
- append->durable_ts = unpack->tw.durable_start_ts;
-
/*
* Additionally, we need to append a tombstone before the onpage value we're about to append to
* the list, if the onpage value has a valid stop pair. Imagine a case where we insert and
@@ -131,17 +119,30 @@ __rec_append_orig_value(
* the tombstone to tell us there is no value between 10 and 20.
*/
if (unpack->tw.stop_ts != WT_TS_MAX || unpack->tw.stop_txn != WT_TXN_MAX) {
+ tombstone_globally_visible =
+ __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.durable_stop_ts);
+
/* No need to append the tombstone if it is already in the update chain. */
if (oldest_upd->type != WT_UPDATE_TOMBSTONE) {
+ /*
+ * We still need to append the globally visible tombstone if its timestamp is WT_TS_NONE
+ * as we may need it to clear the history store content of the key. We don't append a
+ * timestamped globally visible tombstone because even if its timestamp is smaller than
+ * the entries in the history store, we can't change the history store entries. This is
+ * not correct but we hope we can get away with it.
+ *
+ * FIXME-WT-6171: remove this once we get rid of out of order timestamps and mixed mode
+ * transactions.
+ */
+ if (unpack->tw.durable_stop_ts != WT_TS_NONE && tombstone_globally_visible)
+ return (0);
+
WT_ERR(__wt_upd_alloc_tombstone(session, &tombstone, &size));
total_size += size;
tombstone->txnid = unpack->tw.stop_txn;
tombstone->start_ts = unpack->tw.stop_ts;
tombstone->durable_ts = unpack->tw.durable_stop_ts;
-
- tombstone->next = append;
- append = tombstone;
- } else
+ } else {
/*
* Once the prepared update is resolved, the in-memory update and on-disk written copy
* doesn't have same timestamp due to replacing of prepare timestamp with commit and
@@ -150,6 +151,25 @@ __rec_append_orig_value(
WT_ASSERT(session, F_ISSET(unpack, WT_CELL_UNPACK_PREPARE) ||
(unpack->tw.stop_ts == oldest_upd->start_ts &&
unpack->tw.stop_txn == oldest_upd->txnid));
+ if (tombstone_globally_visible)
+ return (0);
+ }
+ }
+
+ /* We need the original on-page value for some reader: get a copy. */
+ if (!tombstone_globally_visible) {
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
+ WT_ERR(__wt_upd_alloc(session, tmp, WT_UPDATE_STANDARD, &append, &size));
+ total_size += size;
+ append->txnid = unpack->tw.start_txn;
+ append->start_ts = unpack->tw.start_ts;
+ append->durable_ts = unpack->tw.durable_start_ts;
+ }
+
+ if (tombstone != NULL) {
+ tombstone->next = append;
+ append = tombstone;
}
/* Append the new entry into the update list. */
@@ -192,8 +212,9 @@ __rec_need_save_upd(
if (F_ISSET(r, WT_REC_CHECKPOINT) && upd_select->upd == NULL)
return (false);
- return (!__wt_txn_visible_all(session, upd_select->tw.stop_txn, upd_select->tw.stop_ts) &&
- !__wt_txn_visible_all(session, upd_select->tw.start_txn, upd_select->tw.start_ts));
+ return (
+ !__wt_txn_visible_all(session, upd_select->tw.stop_txn, upd_select->tw.durable_stop_ts) &&
+ !__wt_txn_visible_all(session, upd_select->tw.start_txn, upd_select->tw.durable_start_ts));
}
/*
@@ -202,7 +223,7 @@ __rec_need_save_upd(
*/
int
__wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip,
- WT_CELL_UNPACK *vpack, WT_UPDATE_SELECT *upd_select)
+ WT_CELL_UNPACK_KV *vpack, WT_UPDATE_SELECT *upd_select)
{
WT_DECL_ITEM(tmp);
WT_DECL_RET;
@@ -343,9 +364,6 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
return (__wt_set_return(session, EBUSY));
}
- if (upd != NULL && upd->start_ts > r->max_ondisk_ts)
- r->max_ondisk_ts = upd->start_ts;
-
/*
* The start timestamp is determined by the commit timestamp when the key is first inserted (or
* last updated). The end timestamp is set when a key/value pair becomes invalid, either because
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 2a4358c585f..4dd14173c5c 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -519,7 +519,7 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
/* Track the page's min/maximum transaction */
r->max_txn = WT_TXN_NONE;
- r->max_ondisk_ts = r->max_ts = WT_TS_NONE;
+ r->max_ts = WT_TS_NONE;
r->min_skipped_ts = WT_TS_MAX;
/* Track if updates were used and/or uncommitted. */
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index 010ef9a80d1..25de3b700b7 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -535,7 +535,7 @@ __wt_modify_reconstruct_from_upd_list(
cursor = &cbt->iface;
/* While we have a pointer to our original modify, grab this information. */
- upd_value->start_ts = upd->start_ts;
+ upd_value->durable_ts = upd->durable_ts;
upd_value->txnid = upd->txnid;
upd_value->prepare_state = upd->prepare_state;
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index e2489bc8563..00cd443398b 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -144,7 +144,7 @@ static int
__rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
wt_timestamp_t rollback_timestamp, bool replace)
{
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK_KV *unpack, _unpack;
WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(hs_key);
@@ -351,7 +351,7 @@ static int
__rollback_abort_row_ondisk_kv(
WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, wt_timestamp_t rollback_timestamp)
{
- WT_CELL_UNPACK *vpack, _vpack;
+ WT_CELL_UNPACK_KV *vpack, _vpack;
WT_DECL_RET;
WT_ITEM buf;
WT_UPDATE *upd;
@@ -635,7 +635,7 @@ __rollback_page_needs_abort(
WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t rollback_timestamp)
{
WT_ADDR *addr;
- WT_CELL_UNPACK vpack;
+ WT_CELL_UNPACK_ADDR vpack;
WT_MULTI *multi;
WT_PAGE_MODIFY *mod;
wt_timestamp_t durable_ts;
@@ -677,7 +677,7 @@ __rollback_page_needs_abort(
} else if (!__wt_off_page(ref->home, addr)) {
tag = "on page cell";
/* Check if the page is obsolete using the page disk address. */
- __wt_cell_unpack(session, ref->home, (WT_CELL *)addr, &vpack);
+ __wt_cell_unpack_addr(session, ref->home->dsk, (WT_CELL *)addr, &vpack);
durable_ts = WT_MAX(vpack.ta.newest_start_durable_ts, vpack.ta.newest_stop_durable_ts);
prepared = F_ISSET(&vpack, WT_CELL_UNPACK_PREPARE);
result = (durable_ts > rollback_timestamp) || prepared;
@@ -704,7 +704,7 @@ static void
__rollback_verify_ondisk_page(
WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp)
{
- WT_CELL_UNPACK *vpack, _vpack;
+ WT_CELL_UNPACK_KV *vpack, _vpack;
WT_ROW *rip;
uint32_t i;
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 1e306fe4b2e..7d81b1c2b80 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -2217,10 +2217,10 @@ buildvariants:
- name: syscall-linux
- name: make-check-asan-test
- name: configure-combinations
- # - name: checkpoint-filetypes-test
+ - name: checkpoint-filetypes-test
# - name: coverage-report
- name: unit-test-long
- # - name: spinlock-gcc-test
+ - name: spinlock-gcc-test
- name: spinlock-pthread-adaptive-test
- name: compile-wtperf
- name: wtperf-test
@@ -2321,9 +2321,9 @@ buildvariants:
- name: syscall-linux
- name: compile-asan
- name: make-check-asan-test
- # - name: checkpoint-filetypes-test
+ - name: checkpoint-filetypes-test
- name: unit-test-long
- # - name: spinlock-gcc-test
+ - name: spinlock-gcc-test
- name: spinlock-pthread-adaptive-test
- name: compile-wtperf
- name: wtperf-test
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index 5ad1cfe65dc..9e959dcd823 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -217,54 +217,57 @@ static void
copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
{
WT_CURSOR *incr_cur;
+ WT_DECL_RET;
size_t len, tmp_sz;
ssize_t rdsize;
- uint64_t offset, type;
- u_int size;
- int ret, rfd, wfd1, wfd2;
- char buf[512], config[512], *first, *second, *tmp;
+ uint64_t offset, size, type;
+ int rfd, wfd1, wfd2;
+ char config[512], *tmp;
bool first_pass;
- /*
- * We need to prepend the home directory name here because we are not using the WiredTiger
- * internal functions that would prepend it for us.
- */
- len = strlen(g.home) + strlen("BACKUP") + strlen(name) + 10;
- first = dmalloc(len);
-
- /*
- * Save another copy of the original file to make debugging recovery errors easier.
- */
- len = strlen(g.home) + strlen("BACKUP.copy") + strlen(name) + 10;
- second = dmalloc(len);
- testutil_check(__wt_snprintf(config, sizeof(config), "incremental=(file=%s)", name));
-
- /* Open the duplicate incremental backup cursor with the file name given. */
tmp_sz = 0;
tmp = NULL;
first_pass = true;
rfd = wfd1 = wfd2 = -1;
+
+ /* Open the duplicate incremental backup cursor with the file name given. */
+ testutil_check(__wt_snprintf(config, sizeof(config), "incremental=(file=%s)", name));
testutil_check(session->open_cursor(session, NULL, bkup_c, config, &incr_cur));
while ((ret = incr_cur->next(incr_cur)) == 0) {
- testutil_check(incr_cur->get_key(incr_cur, &offset, (uint64_t *)&size, &type));
+ testutil_check(incr_cur->get_key(incr_cur, &offset, &size, &type));
if (type == WT_BACKUP_RANGE) {
/*
* Since we are using system calls below instead of a WiredTiger function, we have to
* prepend the home directory to the file names ourselves.
*/
- testutil_check(__wt_snprintf(first, len, "%s/BACKUP/%s", g.home, name));
- testutil_check(__wt_snprintf(second, len, "%s/BACKUP.copy/%s", g.home, name));
+ if (first_pass) {
+ len = strlen(g.home) + strlen(name) + 10;
+ tmp = dmalloc(len);
+ testutil_check(__wt_snprintf(tmp, len, "%s/%s", g.home, name));
+ error_sys_check(rfd = open(tmp, O_RDONLY, 0));
+ free(tmp);
+ tmp = NULL;
+
+ len = strlen(g.home) + strlen("BACKUP") + strlen(name) + 10;
+ tmp = dmalloc(len);
+ testutil_check(__wt_snprintf(tmp, len, "%s/BACKUP/%s", g.home, name));
+ error_sys_check(wfd1 = open(tmp, O_WRONLY | O_CREAT, 0));
+ free(tmp);
+ tmp = NULL;
+
+ len = strlen(g.home) + strlen("BACKUP.copy") + strlen(name) + 10;
+ tmp = dmalloc(len);
+ testutil_check(__wt_snprintf(tmp, len, "%s/BACKUP.copy/%s", g.home, name));
+ error_sys_check(wfd2 = open(tmp, O_WRONLY | O_CREAT, 0));
+ free(tmp);
+ tmp = NULL;
+
+ first_pass = false;
+ }
if (tmp_sz < size) {
tmp = drealloc(tmp, size);
tmp_sz = size;
}
- if (first_pass) {
- testutil_check(__wt_snprintf(buf, sizeof(buf), "%s/%s", g.home, name));
- error_sys_check(rfd = open(buf, O_RDONLY, 0));
- error_sys_check(wfd1 = open(first, O_WRONLY | O_CREAT, 0));
- error_sys_check(wfd2 = open(second, O_WRONLY | O_CREAT, 0));
- first_pass = false;
- }
error_sys_check(lseek(rfd, (wt_off_t)offset, SEEK_SET));
error_sys_check(rdsize = read(rfd, tmp, size));
error_sys_check(lseek(wfd1, (wt_off_t)offset, SEEK_SET));
@@ -273,17 +276,27 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
error_sys_check(write(wfd1, tmp, (size_t)rdsize));
error_sys_check(write(wfd2, tmp, (size_t)rdsize));
} else {
+ testutil_assert(type == WT_BACKUP_FILE);
+ testutil_assert(first_pass == true);
+ testutil_assert(rfd == -1);
+
/*
* These operations are using a WiredTiger function so it will prepend the home
* directory to the name for us.
*/
- testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name));
- testutil_check(__wt_snprintf(second, len, "BACKUP.copy/%s", name));
- testutil_assert(type == WT_BACKUP_FILE);
- testutil_assert(rfd == -1);
- testutil_assert(first_pass == true);
- testutil_check(__wt_copy_and_sync(session, name, first));
- testutil_check(__wt_copy_and_sync(session, first, second));
+ len = strlen("BACKUP") + strlen(name) + 10;
+ tmp = dmalloc(len);
+ testutil_check(__wt_snprintf(tmp, len, "BACKUP/%s", name));
+ testutil_check(__wt_copy_and_sync(session, name, tmp));
+ free(tmp);
+ tmp = NULL;
+
+ len = strlen("BACKUP.copy") + strlen(name) + 10;
+ tmp = dmalloc(len);
+ testutil_check(__wt_snprintf(tmp, len, "BACKUP.copy/%s", name));
+ testutil_check(__wt_copy_and_sync(session, name, tmp));
+ free(tmp);
+ tmp = NULL;
}
}
testutil_check(incr_cur->close(incr_cur));
@@ -292,8 +305,6 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name)
error_sys_check(close(wfd1));
error_sys_check(close(wfd2));
}
- free(first);
- free(second);
free(tmp);
}
/*
diff --git a/src/third_party/wiredtiger/test/format/checkpoint.c b/src/third_party/wiredtiger/test/format/checkpoint.c
index 36e70ae3125..9131e920231 100644
--- a/src/third_party/wiredtiger/test/format/checkpoint.c
+++ b/src/third_party/wiredtiger/test/format/checkpoint.c
@@ -37,6 +37,17 @@ wts_checkpoints(void)
{
char config[1024];
+ /*
+ * Configuring WiredTiger library checkpoints is done separately, rather than as part of the
+ * original database open because format tests small caches and you can get into cache stuck
+ * trouble during the initial load (where bulk load isn't configured). There's a single thread
+ * doing lots of inserts and creating huge leaf pages. Those pages can't be evicted if there's a
+ * checkpoint running in the tree, and the cache can get stuck. That workload is unlikely enough
+ * we're not going to fix it in the library, so configure it away by delaying checkpoint start.
+ */
+ if (g.c_checkpoint_flag != CHECKPOINT_WIREDTIGER)
+ return;
+
testutil_check(
__wt_snprintf(config, sizeof(config), ",checkpoint=(wait=%" PRIu32 ",log_size=%" PRIu32 ")",
g.c_checkpoint_wait, MEGABYTE(g.c_checkpoint_log_size)));
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
index 19f5df8ede4..b02a58abfb0 100755
--- a/src/third_party/wiredtiger/test/format/format.sh
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -214,14 +214,11 @@ skip_known_errors()
log=$1
- # Define each array with multi-signature matching for a single known error
- # and append it to the skip_error_list
- err_1=("heap-buffer-overflow" "__split_parent") # Delete this error line post WT-5518 fix
- err_2=("heap-use-after-free" "__wt_btcur_next_random") # Delete this error line post WT-5552 fix
-
- # skip_error_list is the list of errors to skip, and each error could
- # have multiple signatures to be able to reach a finer match
- skip_error_list=( err_1[@] err_2[@] )
+ # skip_error_list is a list of errors to skip. Each array entry can have multiple signatures
+ # for finger-grained matching. For example:
+ #
+ # err_1=("heap-buffer-overflow" "__split_parent")
+ skip_error_list=( err_1[@] )
# Loop through the skip list and search in the log file.
err_count=${#skip_error_list[@]}
@@ -249,12 +246,11 @@ report_failure()
log="$dir.log"
# DO NOT CURRENTLY SKIP ANY ERRORS.
- skip_ret=0
#skip_known_errors $log
#skip_ret=$?
echo "$name: failure status reported" > $dir/$status
- [[ $skip_ret -ne 0 ]] && failure=$(($failure + 1))
+ failure=$(($failure + 1))
# Forcibly quit if first-failure configured.
[[ $first_failure -ne 0 ]] && force_quit=1
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index ef3a79e7b53..b38c7b721bc 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -204,16 +204,6 @@ operations(u_int ops_seconds, bool lastrun)
if (g.c_txn_timestamps)
testutil_check(__wt_thread_create(NULL, &timestamp_tid, timestamp, tinfo_list));
- /*
- * Configuring WiredTiger library checkpoints is done separately, rather than as part of the
- * original database open because format tests small caches and you can get into cache stuck
- * trouble during the initial load (where bulk load isn't configured). There's a single thread
- * doing lots of inserts and creating huge leaf pages. Those pages can't be evicted if there's a
- * checkpoint running in the tree, and the cache can get stuck. That workload is unlikely enough
- * we're not going to fix it in the library, so configure it away by delaying checkpoint start.
- */
- if (g.c_checkpoint_flag == CHECKPOINT_WIREDTIGER)
- wts_checkpoints();
if (g.c_checkpoint_flag == CHECKPOINT_ON)
testutil_check(__wt_thread_create(NULL, &checkpoint_tid, checkpoint, NULL));
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index b596124087b..5e68d4c524b 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -287,6 +287,8 @@ main(int argc, char *argv[])
TIMED_MAJOR_OP(wts_read_scan());
+ wts_checkpoints();
+
/* Operations. */
for (reps = 1; reps <= FORMAT_OPERATION_REPS; ++reps)
operations(ops_seconds, reps == FORMAT_OPERATION_REPS);