summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-12-27 13:48:37 +1100
committerLuke Chen <luke.chen@mongodb.com>2018-12-27 13:48:37 +1100
commit9ff8d7452ddf7370fc9047ebe9517fd7558914e6 (patch)
tree582219e9112051f579005f48781c0f6f9fb29ea8
parent62c7e599ba211209eb93ae8f652d17fc8f6c251f (diff)
downloadmongo-9ff8d7452ddf7370fc9047ebe9517fd7558914e6.tar.gz
Import wiredtiger: e6c1b9724ed6ed2879a36d7e140f4fa9daceb261 from branch mongodb-4.2
ref: d5793d4dd5..e6c1b9724e for: 4.1.7 WT-4366 Fix how test/format handles prepare conflict errors WT-4426 Change WT data format to include timestamps in leaf page key/value cells WT-4475 clang detected memory leak while executing csuite tests WT-4499 Fix prepared transactions for cursor key order check failure WT-4506 Bypass some csuite tests for valgrind
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/options.m414
-rw-r--r--src/third_party/wiredtiger/build_win/wiredtiger_config.h3
-rw-r--r--src/third_party/wiredtiger/dist/function.py6
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok4
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c29
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c118
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c44
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c47
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c27
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c47
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c4
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h12
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i12
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i296
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h12
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i21
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h5
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i2
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in209
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c150
-rw-r--r--src/third_party/wiredtiger/src/support/global.c23
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c5
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c6
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c4
-rwxr-xr-xsrc/third_party/wiredtiger/test/checkpoint/smoke.sh3
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c2
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c3
-rw-r--r--src/third_party/wiredtiger/test/format/format.h7
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c97
-rw-r--r--src/third_party/wiredtiger/test/suite/test_dictionary.py70
-rw-r--r--src/third_party/wiredtiger/test/suite/test_empty_value.py60
-rw-r--r--src/third_party/wiredtiger/test/utility/misc.c6
47 files changed, 904 insertions, 497 deletions
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 b/src/third_party/wiredtiger/build_posix/aclocal/options.m4
index 47655ba82cb..ff23ab4148c 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/options.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/options.m4
@@ -221,6 +221,20 @@ pthread_adaptive|pthreads_adaptive)
esac
AC_MSG_RESULT($with_spinlock)
+AH_TEMPLATE(HAVE_PAGE_VERSION_TS,
+ [Define to 1 to enable writing timestamp version page formats.])
+AC_MSG_CHECKING(if --enable-page-version-ts option specified)
+AC_ARG_ENABLE(page-version-ts,
+ [AS_HELP_STRING([--enable-page-version-ts],
+ [Configure for timestamp version page formats])],
+ r=$enableval, r=no)
+case "$r" in
+no) wt_cv_enable_page_version_ts=no;;
+*) AC_DEFINE(HAVE_PAGE_VERSION_TS)
+ wt_cv_enable_page_version_ts=yes;;
+esac
+AC_MSG_RESULT($wt_cv_enable_page_version_ts)
+
AC_MSG_CHECKING(if --enable-strict option specified)
AC_ARG_ENABLE(strict,
[AS_HELP_STRING([--enable-strict],
diff --git a/src/third_party/wiredtiger/build_win/wiredtiger_config.h b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
index 6c803748f7b..48a41ba2360 100644
--- a/src/third_party/wiredtiger/build_win/wiredtiger_config.h
+++ b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
@@ -79,6 +79,9 @@
/* Define to 1 to disable any crc32 hardware support. */
/* #undef HAVE_NO_CRC32_HARDWARE */
+/* Define to 1 to enable writing timestamp version page formats. */
+/* #undef HAVE_PAGE_VERSION_TS */
+
/* Define to 1 if pthread condition variables support monotonic clocks. */
/* #undef HAVE_PTHREAD_COND_MONOTONIC */
diff --git a/src/third_party/wiredtiger/dist/function.py b/src/third_party/wiredtiger/dist/function.py
index 7c632b4e227..22c1d2928b9 100644
--- a/src/third_party/wiredtiger/dist/function.py
+++ b/src/third_party/wiredtiger/dist/function.py
@@ -90,8 +90,10 @@ def function_args(name, line):
line = re.sub("^static ", "", line)
line = re.sub("^volatile ", "", line)
- # Let WT_UNUSED terminate the parse. It often appears at the beginning
- # of the function and looks like a WT_XXX variable declaration.
+ # Let WT_ASSERT and WT_UNUSED terminate the parse. The often appear at the
+ # beginning of the function and looks like a WT_XXX variable declaration.
+ if re.search('^WT_ASSERT', line):
+ return False,0
if re.search('^WT_UNUSED', line):
return False,0
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index c5546aed751..e6892f63c72 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -96,6 +96,7 @@ DIRECTIO
DNE
DOI
DONTNEED
+DSK
DUPLICATEV
DataSet
DbCursor
@@ -378,6 +379,7 @@ ThreadList
ThreadListWrapper
Timespec
Timestamp
+Timestamps
TryCV
TxnID
UDF
@@ -398,10 +400,12 @@ UnmapViewOfFile
Unmarshall
Unordered
Uryyb
+VALGRIND
VARCHAR
VLDB
VMSG
VR
+VRFY
VX
Vc
Vfprintf
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 50e7be0039f..feee57aa476 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -616,6 +616,7 @@ dsrc_stats = [
BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale,size'),
BtreeStat('btree_overflow', 'overflow pages', 'no_scale,tree_walk'),
+ BtreeStat('btree_row_empty_values', 'row-store empty values', 'no_scale,tree_walk'),
BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,tree_walk'),
BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,tree_walk'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 53289581e95..a08bdd38a69 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "d5793d4dd57bb763079e3f79821444e7e666ff44",
+ "commit": "e6c1b9724ed6ed2879a36d7e140f4fa9daceb261",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.2"
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 05dd7c70cc0..59331b1978a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -224,7 +224,7 @@ new_page: /* Find the matching WT_COL slot. */
if (cbt->cip_saved != cip) {
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
- __wt_cell_unpack(cell, &unpack);
+ __wt_cell_unpack(page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if ((rle = __wt_cell_rle(&unpack)) == 1)
continue;
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 553c7ca81c2..2451bd97324 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -371,7 +371,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno)
if (cbt->cip_saved != cip) {
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
- __wt_cell_unpack(cell, &unpack);
+ __wt_cell_unpack(page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if (__wt_cell_rle(&unpack) == 1)
continue;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index bedcf37d9f8..7a5d78f8941 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -521,19 +521,15 @@ static int
__debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- uint32_t i;
+ WT_CELL_UNPACK unpack;
WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL);
btree = S2BT(ds->session);
- unpack = &_unpack;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- WT_RET(__debug_cell(ds, dsk, unpack));
- }
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) {
+ WT_RET(__debug_cell(ds, dsk, &unpack));
+ } WT_CELL_FOREACH_END;
return (0);
}
@@ -1001,7 +997,7 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
unpack = NULL;
rle = 1;
} else {
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
rle = __wt_cell_rle(unpack);
}
WT_RET(__wt_snprintf(
@@ -1209,7 +1205,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
else
WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid));
- if (upd->timestamp != 0) {
+ if (upd->timestamp != WT_TS_NONE) {
__wt_timestamp_to_hex_string(
hex_timestamp, upd->timestamp);
WT_RET(ds->f(ds, ", stamp %s", hex_timestamp));
@@ -1275,6 +1271,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
WT_DECL_ITEM(buf);
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ char hex_ts_start[WT_TS_HEX_SIZE], hex_ts_stop[WT_TS_HEX_SIZE];
const char *type;
session = ds->session;
@@ -1313,6 +1310,12 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
break;
}
+ if (unpack->start_ts != WT_TS_NONE || unpack->stop_ts != WT_TS_NONE) {
+ __wt_timestamp_to_hex_string(hex_ts_start, unpack->start_ts);
+ __wt_timestamp_to_hex_string(hex_ts_stop, unpack->stop_ts);
+ WT_RET(ds->f(ds, ", ts %s-%s", hex_ts_start, hex_ts_stop));
+ }
+
/* Dump addresses. */
switch (unpack->raw) {
case WT_CELL_ADDR_DEL:
@@ -1367,6 +1370,12 @@ __debug_cell_data(WT_DBG *ds,
if (unpack == NULL)
return (__debug_item(ds, tag, "deleted", strlen("deleted")));
+ /*
+ * Row-store references to empty cells return a NULL on-page reference.
+ */
+ if (unpack->cell == NULL)
+ return (__debug_item(ds, tag, "", 0));
+
switch (unpack->raw) {
case WT_CELL_ADDR_DEL:
case WT_CELL_ADDR_INT:
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index 6032364fff7..0ac05e59d16 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -207,7 +207,7 @@ __wt_ovfl_remove(WT_SESSION_IMPL *session,
* Discard an on-page overflow value, and reset the page's cell.
*/
int
-__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell)
+__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell)
{
WT_BM *bm;
WT_BTREE *btree;
@@ -217,7 +217,7 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell)
bm = btree->bm;
unpack = &_unpack;
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
/*
* Finally remove overflow key/value objects, called when reconciliation
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 940acbfe3e6..a5ce5878da3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -272,16 +272,12 @@ static void
__inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- const WT_PAGE_HEADER *dsk;
+ WT_CELL_UNPACK unpack;
WT_PAGE_INDEX *pindex;
WT_REF **refp, *ref;
- uint32_t hint, i;
+ uint32_t hint;
btree = S2BT(session);
- dsk = page->dsk;
- unpack = &_unpack;
/*
* Walk the page, building references: the page contains value items.
@@ -290,15 +286,13 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
pindex = WT_INTL_INDEX_GET_SAFE(page);
refp = pindex->index;
hint = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
ref = *refp++;
ref->home = page;
ref->pindex_hint = hint++;
-
- __wt_cell_unpack(cell, unpack);
- ref->addr = cell;
- ref->ref_recno = unpack->v;
- }
+ ref->addr = unpack.cell;
+ ref->ref_recno = unpack.v;
+ } WT_CELL_FOREACH_END;
}
/*
@@ -309,23 +303,17 @@ static void
__inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- const WT_PAGE_HEADER *dsk;
- uint32_t i;
+ WT_CELL_UNPACK unpack;
*np = 0;
btree = S2BT(session);
- dsk = page->dsk;
- unpack = &_unpack;
/* Walk the page, counting entries for the repeats array. */
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- if (__wt_cell_rle(unpack) > 1)
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ if (__wt_cell_rle(&unpack) > 1)
++*np;
- }
+ } WT_CELL_FOREACH_END;
}
/*
@@ -338,22 +326,18 @@ __inmem_col_var(
WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK unpack;
WT_COL *cip;
WT_COL_RLE *repeats;
- const WT_PAGE_HEADER *dsk;
size_t size;
uint64_t rle;
- uint32_t i, indx, n, repeat_off;
+ uint32_t indx, n, repeat_off;
void *p;
btree = S2BT(session);
- dsk = page->dsk;
repeats = NULL;
repeat_off = 0;
- unpack = &_unpack;
/*
* Walk the page, building references: the page contains unsorted value
@@ -362,9 +346,8 @@ __inmem_col_var(
*/
indx = 0;
cip = page->pg_var;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, cell));
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell));
cip++;
/*
@@ -373,7 +356,7 @@ __inmem_col_var(
* repeats array triggers a re-walk from the start of the page
* to determine the size of the array.
*/
- rle = __wt_cell_rle(unpack);
+ rle = __wt_cell_rle(&unpack);
if (rle > 1) {
if (repeats == NULL) {
__inmem_col_var_repeats(session, page, &n);
@@ -392,7 +375,7 @@ __inmem_col_var(
}
indx++;
recno += rle;
- }
+ } WT_CELL_FOREACH_END;
return (0);
}
@@ -405,19 +388,15 @@ static int
__inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK unpack;
WT_DECL_ITEM(current);
WT_DECL_RET;
- const WT_PAGE_HEADER *dsk;
WT_PAGE_INDEX *pindex;
WT_REF *ref, **refp;
- uint32_t hint, i;
+ uint32_t hint;
bool overflow_keys;
btree = S2BT(session);
- unpack = &_unpack;
- dsk = page->dsk;
WT_RET(__wt_scr_alloc(session, 0, &current));
@@ -430,19 +409,18 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
refp = pindex->index;
overflow_keys = false;
hint = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
ref = *refp;
ref->home = page;
ref->pindex_hint = hint++;
- __wt_cell_unpack(cell, unpack);
- switch (unpack->type) {
+ switch (unpack.type) {
case WT_CELL_KEY:
/*
* Note: we don't Huffman encode internal page keys,
* there's no decoding work to do.
*/
- __wt_ref_key_onpage_set(page, ref, unpack);
+ __wt_ref_key_onpage_set(page, ref, &unpack);
break;
case WT_CELL_KEY_OVFL:
/*
@@ -452,10 +430,10 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
* items.
*/
WT_ERR(__wt_dsk_cell_data_ref(
- session, page->type, unpack, current));
+ session, page->type, &unpack, current));
WT_ERR(__wt_row_ikey_incr(session, page,
- WT_PAGE_DISK_OFFSET(page, cell),
+ WT_PAGE_DISK_OFFSET(page, unpack.cell),
current->data, current->size, ref));
*sizep += sizeof(WT_IKEY) + current->size;
@@ -481,7 +459,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
*
* Re-create the state of a deleted page.
*/
- ref->addr = cell;
+ ref->addr = unpack.cell;
WT_REF_SET_STATE(ref, WT_REF_DELETED);
++refp;
@@ -500,12 +478,12 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
case WT_CELL_ADDR_INT:
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
- ref->addr = cell;
+ ref->addr = unpack.cell;
++refp;
break;
- WT_ILLEGAL_VALUE_ERR(session, unpack->type);
+ WT_ILLEGAL_VALUE_ERR(session, unpack.type);
}
- }
+ } WT_CELL_FOREACH_END;
/*
* We track if an internal page has backing overflow keys, as overflow
@@ -527,12 +505,10 @@ __inmem_row_leaf_entries(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- uint32_t i, nindx;
+ WT_CELL_UNPACK unpack;
+ uint32_t nindx;
btree = S2BT(session);
- unpack = &_unpack;
/*
* Leaf row-store page entries map to a maximum of one-to-one to the
@@ -546,9 +522,8 @@ __inmem_row_leaf_entries(
* single on-page (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item.
*/
nindx = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- switch (unpack->type) {
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ switch (unpack.type) {
case WT_CELL_KEY:
case WT_CELL_KEY_OVFL:
++nindx;
@@ -556,9 +531,9 @@ __inmem_row_leaf_entries(
case WT_CELL_VALUE:
case WT_CELL_VALUE_OVFL:
break;
- WT_ILLEGAL_VALUE(session, unpack->type);
+ WT_ILLEGAL_VALUE(session, unpack.type);
}
- }
+ } WT_CELL_FOREACH_END;
*nindxp = nindx;
return (0);
@@ -572,23 +547,17 @@ static int
__inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- const WT_PAGE_HEADER *dsk;
+ WT_CELL_UNPACK unpack;
WT_ROW *rip;
- uint32_t i;
btree = S2BT(session);
- dsk = page->dsk;
- unpack = &_unpack;
/* Walk the page, building indices. */
rip = page->pg_row;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- switch (unpack->type) {
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) {
+ switch (unpack.type) {
case WT_CELL_KEY_OVFL:
- __wt_row_leaf_key_set_cell(page, rip, cell);
+ __wt_row_leaf_key_set_cell(page, rip, unpack.cell);
++rip;
break;
case WT_CELL_KEY:
@@ -597,10 +566,11 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* or prefix compressed), can be directly referenced on
* the page to avoid repeatedly unpacking their cells.
*/
- if (!btree->huffman_key && unpack->prefix == 0)
- __wt_row_leaf_key_set(page, rip, unpack);
+ if (!btree->huffman_key && unpack.prefix == 0)
+ __wt_row_leaf_key_set(page, rip, &unpack);
else
- __wt_row_leaf_key_set_cell(page, rip, cell);
+ __wt_row_leaf_key_set_cell(
+ page, rip, unpack.cell);
++rip;
break;
case WT_CELL_VALUE:
@@ -610,13 +580,13 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
* their cells.
*/
if (!btree->huffman_value)
- __wt_row_leaf_value_set(page, rip - 1, unpack);
+ __wt_row_leaf_value_set(page, rip - 1, &unpack);
break;
case WT_CELL_VALUE_OVFL:
break;
- WT_ILLEGAL_VALUE(session, unpack->type);
+ WT_ILLEGAL_VALUE(session, unpack.type);
}
- }
+ } WT_CELL_FOREACH_END;
/*
* We do not currently instantiate keys on leaf pages when the page is
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index a509bbb88bc..365c852a717 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -194,11 +194,9 @@ __rebalance_col_walk(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
WT_BTREE *btree;
- WT_CELL *cell;
WT_CELL_UNPACK unpack;
WT_DECL_ITEM(buf);
WT_DECL_RET;
- uint32_t i;
btree = S2BT(session);
@@ -213,8 +211,7 @@ __rebalance_col_walk(
* location cookie pairs. Keys are on-page/overflow items and location
* cookies are WT_CELL_ADDR_XXX items.
*/
- WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) {
- __wt_cell_unpack(cell, &unpack);
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_ADDR_INT:
/* An internal page: read it and recursively walk it. */
@@ -237,7 +234,7 @@ __rebalance_col_walk(
break;
WT_ILLEGAL_VALUE_ERR(session, unpack.type);
}
- }
+ } WT_CELL_FOREACH_END;
err: __wt_scr_free(session, &buf);
return (ret);
@@ -278,13 +275,11 @@ __rebalance_row_walk(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
WT_BTREE *btree;
- WT_CELL *cell;
WT_CELL_UNPACK key, unpack;
WT_DECL_ITEM(buf);
WT_DECL_ITEM(leafkey);
WT_DECL_RET;
size_t len;
- uint32_t i;
bool first_cell;
const void *p;
@@ -304,8 +299,7 @@ __rebalance_row_walk(
* cookies are WT_CELL_ADDR_XXX items.
*/
first_cell = true;
- WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) {
- __wt_cell_unpack(cell, &unpack);
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
switch (unpack.type) {
case WT_CELL_KEY:
key = unpack;
@@ -388,7 +382,7 @@ __rebalance_row_walk(
break;
WT_ILLEGAL_VALUE_ERR(session, unpack.type);
}
- }
+ } WT_CELL_FOREACH_END;
err: __wt_scr_free(session, &buf);
__wt_scr_free(session, &leafkey);
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 95ba4114345..6d08901539e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -110,7 +110,7 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
if (page->type == WT_PAGE_COL_VAR) {
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
- __wt_cell_unpack(cell, &unpack);
+ __wt_cell_unpack(page, cell, &unpack);
return (__wt_page_cell_data_ref(
session, page, &unpack, &cursor->value));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index a719af982ec..ea46bc8a72c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -521,16 +521,13 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session,
const WT_PAGE_HEADER *dsk, uint8_t *addr, size_t addr_size, WT_STUFF *ss)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK unpack;
WT_DECL_RET;
WT_PAGE *page;
WT_TRACK *trk;
uint64_t stop_recno;
- uint32_t i;
btree = S2BT(session);
- unpack = &_unpack;
page = NULL;
trk = NULL;
@@ -565,10 +562,9 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session,
* the page.
*/
stop_recno = dsk->recno;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- stop_recno += __wt_cell_rle(unpack);
- }
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ stop_recno += __wt_cell_rle(&unpack);
+ } WT_CELL_FOREACH_END;
trk->col_start = dsk->recno;
trk->col_stop = stop_recno - 1;
@@ -661,23 +657,20 @@ __slvg_trk_leaf_ovfl(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRACK *trk)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
- uint32_t i, ovfl_cnt;
+ WT_CELL_UNPACK unpack;
+ uint32_t ovfl_cnt;
btree = S2BT(session);
- unpack = &_unpack;
/*
* Two passes: count the overflow items, then copy them into an
* allocated array.
*/
ovfl_cnt = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- if (unpack->ovfl)
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ if (unpack.ovfl)
++ovfl_cnt;
- }
+ } WT_CELL_FOREACH_END;
if (ovfl_cnt == 0)
return (0);
@@ -686,25 +679,24 @@ __slvg_trk_leaf_ovfl(
trk->trk_ovfl_cnt = ovfl_cnt;
ovfl_cnt = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
- __wt_cell_unpack(cell, unpack);
- if (unpack->ovfl) {
- WT_RET(__wt_memdup(session, unpack->data,
- unpack->size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) {
+ if (unpack.ovfl) {
+ WT_RET(__wt_memdup(session, unpack.data,
+ unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr));
trk->trk_ovfl_addr[ovfl_cnt].size =
- (uint8_t)unpack->size;
+ (uint8_t)unpack.size;
__wt_verbose(session, WT_VERB_SALVAGE,
"%s overflow reference %s",
__wt_addr_string(session,
trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1),
__wt_addr_string(session,
- unpack->data, unpack->size, trk->ss->tmp2));
+ unpack.data, unpack.size, trk->ss->tmp2));
if (++ovfl_cnt == trk->trk_ovfl_cnt)
break;
}
- }
+ } WT_CELL_FOREACH_END;
return (0);
}
@@ -1360,7 +1352,7 @@ __slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk,
WT_COL_FOREACH(page, cip, i) {
cell = WT_COL_PTR(page, cip);
- __wt_cell_unpack(cell, &unpack);
+ __wt_cell_unpack(page, cell, &unpack);
recno += __wt_cell_rle(&unpack);
/*
@@ -2083,7 +2075,7 @@ __slvg_row_ovfl(WT_SESSION_IMPL *session,
(void)__wt_row_leaf_key_info(
page, copy, NULL, &cell, NULL, NULL);
if (cell != NULL) {
- __wt_cell_unpack(cell, &unpack);
+ __wt_cell_unpack(page, cell, &unpack);
WT_RET(__slvg_row_ovfl_single(session, trk, &unpack));
}
__wt_row_leaf_value_cell(page, rip, NULL, &unpack);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index b2e0883010d..5de4029ebc0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -185,9 +185,9 @@ __split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref)
ikey->cell_offset = 0;
cell = WT_PAGE_REF_OFFSET(page, cell_offset);
- __wt_cell_unpack(cell, &kpack);
+ __wt_cell_unpack(page, cell, &kpack);
if (kpack.ovfl && kpack.raw != WT_CELL_KEY_OVFL_RM)
- WT_RET(__wt_ovfl_discard(session, cell));
+ WT_RET(__wt_ovfl_discard(session, page, cell));
return (0);
}
@@ -260,7 +260,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
*/
WT_ORDERED_READ(ref_addr, ref->addr);
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
- __wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
+ __wt_cell_unpack(from_home, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
WT_ERR(__wt_memdup(
session, unpack.data, unpack.size, &addr->addr));
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index 88efe4e2e24..5fdce5edf96 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -165,7 +165,7 @@ __stat_page_col_var(
++deleted_cnt;
} else {
orig_deleted = false;
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
if (unpack->type == WT_CELL_DEL)
orig_deleted = true;
else {
@@ -230,9 +230,8 @@ __stat_page_row_int(
WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
WT_BTREE *btree;
- WT_CELL *cell;
WT_CELL_UNPACK unpack;
- uint32_t i, ovfl_cnt;
+ uint32_t ovfl_cnt;
btree = S2BT(session);
ovfl_cnt = 0;
@@ -245,11 +244,10 @@ __stat_page_row_int(
* a reference to the original cell.
*/
if (page->dsk != NULL)
- WT_CELL_FOREACH(btree, page->dsk, cell, &unpack, i) {
- __wt_cell_unpack(cell, &unpack);
- if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL)
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) {
+ if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL)
++ovfl_cnt;
- }
+ } WT_CELL_FOREACH_END;
WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
}
@@ -263,15 +261,15 @@ __stat_page_row_leaf(
WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats)
{
WT_BTREE *btree;
- WT_CELL *cell;
WT_CELL_UNPACK unpack;
WT_INSERT *ins;
WT_ROW *rip;
WT_UPDATE *upd;
- uint32_t entry_cnt, i, ovfl_cnt;
+ uint32_t empty_values, entry_cnt, i, ovfl_cnt;
+ bool key;
btree = S2BT(session);
- entry_cnt = ovfl_cnt = 0;
+ empty_values = entry_cnt = ovfl_cnt = 0;
WT_STAT_INCR(session, stats, btree_row_leaf);
@@ -311,14 +309,33 @@ __stat_page_row_leaf(
* Overflow keys are hard: we have to walk the disk image to count them,
* the in-memory representation of the page doesn't necessarily contain
* a reference to the original cell.
+ *
+ * Zero-length values are the same, we have to look at the disk image to
+ * know. They aren't stored but we know they exist if there are two keys
+ * in a row, or a key as the last item.
*/
- if (page->dsk != NULL)
- WT_CELL_FOREACH(btree, page->dsk, cell, &unpack, i) {
- __wt_cell_unpack(cell, &unpack);
- if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL)
+ if (page->dsk != NULL) {
+ key = false;
+ WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) {
+ switch (__wt_cell_type(unpack.cell)) {
+ case WT_CELL_KEY_OVFL:
++ovfl_cnt;
- }
+ /* FALLTHROUGH */
+ case WT_CELL_KEY:
+ if (key)
+ ++empty_values;
+ key = true;
+ break;
+ default:
+ key = false;
+ break;
+ }
+ } WT_CELL_FOREACH_END;
+ if (key)
+ ++empty_values;
+ }
+ WT_STAT_INCRV(session, stats, btree_row_empty_values, empty_values);
WT_STAT_INCRV(session, stats, btree_entries, entry_cnt);
WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 7f711be3480..dccfd97f322 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -416,7 +416,7 @@ recno_chk: if (recno != vs->record_total + 1)
if ((cell = WT_COL_PTR(page, cip)) == NULL)
++recno;
else {
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
recno += __wt_cell_rle(unpack);
}
vs->record_total += recno;
@@ -436,7 +436,7 @@ recno_chk: if (recno != vs->record_total + 1)
/* If it's not the root page, unpack the parent cell. */
if (!__wt_ref_is_root(ref)) {
- __wt_cell_unpack(ref->addr, unpack);
+ __wt_cell_unpack(ref->home, ref->addr, unpack);
/* Compare the parent cell against the page type. */
switch (page->type) {
@@ -533,7 +533,8 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
--vs->depth;
WT_RET(ret);
- __wt_cell_unpack(child_ref->addr, unpack);
+ __wt_cell_unpack(
+ child_ref->home, child_ref->addr, unpack);
WT_RET(bm->verify_addr(
bm, session, unpack->data, unpack->size));
} WT_INTL_FOREACH_END;
@@ -563,7 +564,8 @@ celltype_err: WT_RET_MSG(session, WT_ERROR,
--vs->depth;
WT_RET(ret);
- __wt_cell_unpack(child_ref->addr, unpack);
+ __wt_cell_unpack(
+ child_ref->home, child_ref->addr, unpack);
WT_RET(bm->verify_addr(
bm, session, unpack->data, unpack->size));
} WT_INTL_FOREACH_END;
@@ -690,16 +692,14 @@ __verify_overflow_cell(
WT_SESSION_IMPL *session, WT_REF *ref, bool *found, WT_VSTUFF *vs)
{
WT_BTREE *btree;
- WT_CELL *cell;
- WT_CELL_UNPACK *unpack, _unpack;
+ WT_CELL_UNPACK unpack;
WT_DECL_RET;
const WT_PAGE_HEADER *dsk;
- uint32_t cell_num, i;
+ uint32_t cell_num;
*found = false;
btree = S2BT(session);
- unpack = &_unpack;
/*
* If a tree is empty (just created), it won't have a disk image;
@@ -710,18 +710,17 @@ __verify_overflow_cell(
/* Walk the disk page, verifying pages referenced by overflow cells. */
cell_num = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) {
++cell_num;
- __wt_cell_unpack(cell, unpack);
- switch (unpack->type) {
+ switch (unpack.type) {
case WT_CELL_KEY_OVFL:
case WT_CELL_VALUE_OVFL:
*found = true;
WT_ERR(__verify_overflow(
- session, unpack->data, unpack->size, vs));
+ session, unpack.data, unpack.size, vs));
break;
}
- }
+ } WT_CELL_FOREACH_END;
return (0);
@@ -730,7 +729,7 @@ err: WT_RET_MSG(session, ret,
"that failed verification",
cell_num - 1,
__wt_page_addr_string(session, ref, vs->tmp1),
- __wt_addr_string(session, unpack->data, unpack->size, vs->tmp2));
+ __wt_addr_string(session, unpack.data, unpack.size, vs->tmp2));
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index 39f4a041ea9..8db215bd162 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -39,6 +39,18 @@ static int __verify_dsk_row(
} while (0)
/*
+ * WT_CELL_FOREACH_VRFY --
+ * Iterate through each cell on a page. Verify-specific version of the
+ * WT_CELL_FOREACH macro, created because the loop can't simply unpack cells,
+ * verify has to do additional work to ensure that unpack is safe.
+ */
+#define WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) \
+ for ((cell) = \
+ WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; \
+ (i) > 0; \
+ (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i))
+
+/*
* __wt_verify_dsk_image --
* Verify a single block as read from disk.
*/
@@ -48,7 +60,6 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session,
{
uint8_t flags;
const uint8_t *p, *end;
- u_int i;
/* Check the page type. */
switch (dsk->type) {
@@ -114,12 +125,22 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session,
"page at %s has invalid flags set: 0x%" PRIx8,
tag, flags);
- /* Unused bytes */
- for (p = dsk->unused, i = sizeof(dsk->unused); i > 0; --i)
- if (*p != '\0')
- WT_RET_VRFY(session,
- "page at %s has non-zero unused page header bytes",
- tag);
+ /* Check the unused byte. */
+ if (dsk->unused != 0)
+ WT_RET_VRFY(session,
+ "page at %s has non-zero unused page header bytes",
+ tag);
+
+ /* Check the page version. */
+ switch (dsk->version) {
+ case WT_PAGE_VERSION_ORIG:
+ case WT_PAGE_VERSION_TS:
+ break;
+ default:
+ WT_RET_VRFY(session,
+ "page at %s has an invalid version of %" PRIu8,
+ tag, dsk->version);
+ }
/*
* Any bytes after the data chunk should be nul bytes; ignore if the
@@ -226,11 +247,11 @@ __verify_dsk_row(
last_cell_type = FIRST;
cell_num = 0;
key_cnt = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0) {
+ if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0) {
ret = __err_cell_corrupt(session, cell_num, tag);
goto err;
}
@@ -499,11 +520,11 @@ __verify_dsk_col_int(
end = (uint8_t *)dsk + dsk->mem_size;
cell_num = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0)
+ if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0)
return (__err_cell_corrupt(session, cell_num, tag));
/* Check the raw and collapsed cell types. */
@@ -570,11 +591,11 @@ __verify_dsk_col_var(
last_deleted = false;
cell_num = 0;
- WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+ WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) {
++cell_num;
/* Carefully unpack the cell. */
- if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0)
+ if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0)
return (__err_cell_corrupt(session, cell_num, tag));
/* Check the raw and collapsed cell types. */
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index 39eddb6e0cc..b7406d47329 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -262,7 +262,7 @@ switch_and_jump: /* Switching to a forward roll. */
/*
* It must be an on-page cell, unpack it.
*/
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
/* 3: the test for an on-page reference to an overflow key. */
if (unpack->type == WT_CELL_KEY_OVFL) {
@@ -286,7 +286,7 @@ switch_and_jump: /* Switching to a forward roll. */
copy = WT_ROW_KEY_COPY(rip);
if (!__wt_row_leaf_key_info(page, copy,
NULL, &cell, &keyb->data, &keyb->size)) {
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
ret = __wt_dsk_cell_data_ref(session,
WT_PAGE_ROW_LEAF, unpack, keyb);
}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index b9b66bf0c7b..3f5472ac66a 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -80,12 +80,12 @@ struct __wt_page_header {
#define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */
uint8_t flags; /* 25: flags */
- /*
- * End the structure with 2 bytes of padding: it wastes space, but it
- * leaves the structure 32-bit aligned and having a few bytes to play
- * with in the future can't hurt.
- */
- uint8_t unused[2]; /* 26-27: unused padding */
+ /* A byte of padding, positioned to be added to the flags. */
+ uint8_t unused; /* 26: unused padding */
+
+#define WT_PAGE_VERSION_ORIG 0 /* Original version */
+#define WT_PAGE_VERSION_TS 1 /* Timestamps added */
+ uint8_t version; /* 27: version */
};
/*
* WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index c9004138370..856a761ca4f 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1048,13 +1048,13 @@ __wt_row_leaf_value_cell(
page, copy, NULL, &kcell, &key, &size) && kcell == NULL)
vcell = (WT_CELL *)((uint8_t *)key + size);
else {
- __wt_cell_unpack(kcell, &unpack);
+ __wt_cell_unpack(page, kcell, &unpack);
vcell = (WT_CELL *)((uint8_t *)
unpack.cell + __wt_cell_total_len(&unpack));
}
}
- __wt_cell_unpack(__wt_cell_leaf_value_parse(page, vcell), vpack);
+ __wt_cell_unpack(page, __wt_cell_leaf_value_parse(page, vcell), vpack);
}
/*
@@ -1091,9 +1091,11 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
{
WT_ADDR *addr;
WT_CELL_UNPACK *unpack, _unpack;
+ WT_PAGE *page;
addr = ref->addr;
unpack = &_unpack;
+ page = ref->home;
/*
* If NULL, there is no location.
@@ -1107,7 +1109,7 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
*sizep = 0;
if (typep != NULL)
*typep = 0;
- } else if (__wt_off_page(ref->home, addr)) {
+ } else if (__wt_off_page(page, addr)) {
*addrp = addr->addr;
*sizep = addr->size;
if (typep != NULL)
@@ -1126,7 +1128,7 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
break;
}
} else {
- __wt_cell_unpack((WT_CELL *)addr, unpack);
+ __wt_cell_unpack(page, (WT_CELL *)addr, unpack);
*addrp = unpack->data;
*sizep = unpack->size;
if (typep != NULL)
@@ -1364,7 +1366,7 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
mod->last_eviction_id != __wt_txn_oldest_id(session))
return (true);
- if (mod->last_eviction_timestamp == 0)
+ if (mod->last_eviction_timestamp == WT_TS_NONE)
return (true);
__wt_txn_pinned_timestamp(session, &pinned_ts);
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index cea27e1b26f..94e50ae7917 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -16,8 +16,9 @@
*
* There are 4 basic cell types: keys and data (each of which has an overflow
* form), deleted cells and off-page references. The cell is usually followed
- * by additional data, varying by type: a key or data cell is followed by a set
- * of bytes, an address cookie follows overflow or off-page cells.
+ * by additional data, varying by type: keys are followed by a chunk of data,
+ * data is followed by a pair of timestamps and a chunk of data, overflow and
+ * off-page cells are followed by an address cookie.
*
* Deleted cells are place-holders for column-store files, where entries cannot
* be removed in order to preserve the record count.
@@ -51,7 +52,8 @@
* 0x01 Short key cell
* 0x10 Short key cell, with a following prefix-compression byte
* 0x11 Short value cell
- * In these cases, the other 6 bits of the descriptor byte are the data length.
+ * In the "short" variants, the other 6 bits of the descriptor byte are the
+ * data length.
*
* Bit 3 marks an 8B packed, uint64_t value following the cell description byte.
* (A run-length counter or a record number for variable-length column store.)
@@ -88,7 +90,7 @@
*
* WT_CELL_VALUE_COPY is a reference to a previous cell on the page, supporting
* value dictionaries: if the two values are the same, we only store them once
- * and have the second and subsequent use reference the original.
+ * and have any second and subsequent uses reference the original.
*/
#define WT_CELL_ADDR_DEL (0) /* Address: deleted */
#define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */
@@ -122,16 +124,20 @@
*/
struct __wt_cell {
/*
- * Maximum of 16 bytes:
+ * Maximum of 34 bytes:
* 1: cell descriptor byte
* 1: prefix compression count
+ * 9: start timestamp (uint64_t encoding, max 9 bytes)
+ * 9: stop timestamp (uint64_t encoding, max 9 bytes)
* 9: associated 64-bit value (uint64_t encoding, max 9 bytes)
* 5: data length (uint32_t encoding, max 5 bytes)
*
* This calculation is pessimistic: the prefix compression count and
- * 64V value overlap, the 64V value and data length are optional.
+ * 64V value overlap, the 64V value and data length are optional, and
+ * timestamps only appear in values.
*/
- uint8_t __chunk[1 + 1 + WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
+ uint8_t __chunk[1 + 1 +
+ 3 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
};
/*
@@ -143,6 +149,9 @@ struct __wt_cell_unpack {
uint64_t v; /* RLE count or recno */
+ /* start/stop timestamps */
+ wt_timestamp_t start_ts, stop_ts;
+
/*
* !!!
* The size and __len fields are reasonably type size_t; don't change
@@ -162,16 +171,6 @@ struct __wt_cell_unpack {
};
/*
- * WT_CELL_FOREACH --
- * Walk the cells on a page.
- */
-#define WT_CELL_FOREACH(btree, dsk, cell, unpack, i) \
- for ((cell) = \
- WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; \
- (i) > 0; \
- (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i))
-
-/*
* __wt_cell_pack_addr --
* Pack an address cell.
*/
@@ -193,14 +192,33 @@ __wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno, size_t size)
}
/*
+ * __cell_pack_timestamp_pair --
+ * Pack a start, stop timestamp pair.
+ */
+static inline void
+__cell_pack_timestamp_pair(
+ uint8_t **pp, wt_timestamp_t start_ts, wt_timestamp_t stop_ts)
+{
+ if (__wt_process.page_version_ts) {
+ /* Start timestamp, stop timestamp difference. */
+ (void)__wt_vpack_uint(pp, 0, start_ts);
+ (void)__wt_vpack_uint(pp, 0, stop_ts - start_ts);
+ }
+}
+
+/*
* __wt_cell_pack_data --
* Set a data item's WT_CELL contents.
*/
static inline size_t
-__wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size)
+__wt_cell_pack_data(WT_CELL *cell,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size)
{
uint8_t byte, *p;
+ p = cell->__chunk + 1;
+ __cell_pack_timestamp_pair(&p, start_ts, stop_ts);
+
/*
* Short data cells without run-length encoding have 6 bits of data
* length in the descriptor byte.
@@ -209,72 +227,86 @@ __wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size)
byte = (uint8_t)size; /* Type + length */
cell->__chunk[0] = (uint8_t)
((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT);
- return (1);
- }
-
- p = cell->__chunk + 1;
- if (rle < 2) {
- size -= WT_CELL_SIZE_ADJUST;
- cell->__chunk[0] = WT_CELL_VALUE; /* Type */
} else {
- cell->__chunk[0] = WT_CELL_VALUE | WT_CELL_64V;
- (void)__wt_vpack_uint(&p, 0, rle); /* RLE */
+ if (rle < 2) {
+ size -= WT_CELL_SIZE_ADJUST;
+ cell->__chunk[0] = WT_CELL_VALUE; /* Type */
+ } else {
+ cell->__chunk[0] = WT_CELL_VALUE | WT_CELL_64V;
+ (void)__wt_vpack_uint(&p, 0, rle); /* RLE */
+ }
+ (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
}
- (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
return (WT_PTRDIFF(p, cell));
}
/*
* __wt_cell_pack_data_match --
- * Return if two items would have identical WT_CELLs (except for any RLE).
+ * Return if two items would have identical WT_CELLs (except for timestamps
+ * and any RLE).
*/
static inline int
-__wt_cell_pack_data_match(
- WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data,
- bool *matchp)
+__wt_cell_pack_data_match(WT_CELL *page_cell,
+ WT_CELL *val_cell, const uint8_t *val_data, bool *matchp)
{
- uint64_t av, bv;
+ uint64_t alen, blen, v;
const uint8_t *a, *b;
bool rle;
- *matchp = 0; /* Default to no-match */
+ *matchp = false; /* Default to no-match */
/*
* This is a special-purpose function used by reconciliation to support
* dictionary lookups. We're passed an on-page cell and a created cell
* plus a chunk of data we're about to write on the page, and we return
* if they would match on the page. The column-store comparison ignores
- * the RLE because the copied cell will have its own RLE.
+ * the timestamps and the RLE because the copied cell will have its own.
*/
a = (uint8_t *)page_cell;
b = (uint8_t *)val_cell;
if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) {
- av = a[0] >> WT_CELL_SHORT_SHIFT;
+ alen = a[0] >> WT_CELL_SHORT_SHIFT;
++a;
+ if (__wt_process.page_version_ts) {
+ WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ }
} else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) {
- rle = (a[0] & WT_CELL_64V) != 0; /* Skip any RLE */
+ rle = (a[0] & WT_CELL_64V) != 0;
++a;
- if (rle)
- WT_RET(__wt_vunpack_uint(&a, 0, &av));
- WT_RET(__wt_vunpack_uint(&a, 0, &av)); /* Length */
+ if (__wt_process.page_version_ts) {
+ WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ }
+ if (rle) /* Skip RLE */
+ WT_RET(__wt_vunpack_uint(&a, 0, &v));
+ WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */
} else
return (0);
if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) {
- bv = b[0] >> WT_CELL_SHORT_SHIFT;
+ blen = b[0] >> WT_CELL_SHORT_SHIFT;
++b;
+ if (__wt_process.page_version_ts) {
+ WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ }
} else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) {
- rle = (b[0] & WT_CELL_64V) != 0; /* Skip any RLE */
+ rle = (b[0] & WT_CELL_64V) != 0;
++b;
- if (rle)
- WT_RET(__wt_vunpack_uint(&b, 0, &bv));
- WT_RET(__wt_vunpack_uint(&b, 0, &bv)); /* Length */
+ if (__wt_process.page_version_ts) {
+ WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ }
+ if (rle) /* Skip RLE */
+ WT_RET(__wt_vunpack_uint(&b, 0, &v));
+ WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */
} else
return (0);
- if (av == bv)
- *matchp = memcmp(a, val_data, av) == 0;
+ if (alen == blen)
+ *matchp = memcmp(a, val_data, alen) == 0;
return (0);
}
@@ -283,16 +315,19 @@ __wt_cell_pack_data_match(
* Write a copy value cell.
*/
static inline size_t
-__wt_cell_pack_copy(WT_CELL *cell, uint64_t rle, uint64_t v)
+__wt_cell_pack_copy(WT_CELL *cell,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, uint64_t v)
{
uint8_t *p;
p = cell->__chunk + 1;
+ __cell_pack_timestamp_pair(&p, start_ts, stop_ts);
- if (rle < 2) /* Type */
- cell->__chunk[0] = WT_CELL_VALUE_COPY;
- else { /* Type */
- cell->__chunk[0] = WT_CELL_VALUE_COPY | WT_CELL_64V;
+ if (rle < 2)
+ cell->__chunk[0] = WT_CELL_VALUE_COPY; /* Type */
+ else {
+ cell->__chunk[0] = /* Type */
+ WT_CELL_VALUE_COPY | WT_CELL_64V;
(void)__wt_vpack_uint(&p, 0, rle); /* RLE */
}
(void)__wt_vpack_uint(&p, 0, v); /* Copy offset */
@@ -304,18 +339,21 @@ __wt_cell_pack_copy(WT_CELL *cell, uint64_t rle, uint64_t v)
* Write a deleted value cell.
*/
static inline size_t
-__wt_cell_pack_del(WT_CELL *cell, uint64_t rle)
+__wt_cell_pack_del(WT_CELL *cell,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
{
uint8_t *p;
p = cell->__chunk + 1;
- if (rle < 2) { /* Type */
- cell->__chunk[0] = WT_CELL_DEL;
- return (1);
+ __cell_pack_timestamp_pair(&p, start_ts, stop_ts);
+
+ if (rle < 2)
+ cell->__chunk[0] = WT_CELL_DEL; /* Type */
+ else {
+ cell->__chunk[0] = /* Type */
+ WT_CELL_DEL | WT_CELL_64V;
+ (void)__wt_vpack_uint(&p, 0, rle); /* RLE */
}
- /* Type */
- cell->__chunk[0] = WT_CELL_DEL | WT_CELL_64V;
- (void)__wt_vpack_uint(&p, 0, rle); /* RLE */
return (WT_PTRDIFF(p, cell));
}
@@ -341,7 +379,6 @@ __wt_cell_pack_int_key(WT_CELL *cell, size_t size)
size -= WT_CELL_SIZE_ADJUST;
(void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
-
return (WT_PTRDIFF(p, cell));
}
@@ -380,7 +417,6 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
size -= WT_CELL_SIZE_ADJUST;
(void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
-
return (WT_PTRDIFF(p, cell));
}
@@ -389,15 +425,26 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
* Pack an overflow cell.
*/
static inline size_t
-__wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type, uint64_t rle, size_t size)
+__wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size)
{
uint8_t *p;
p = cell->__chunk + 1;
- if (rle < 2) /* Type */
- cell->__chunk[0] = type;
+ switch (type) {
+ case WT_CELL_KEY_OVFL:
+ case WT_CELL_KEY_OVFL_RM:
+ break;
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ __cell_pack_timestamp_pair(&p, start_ts, stop_ts);
+ break;
+ }
+
+ if (rle < 2)
+ cell->__chunk[0] = type; /* Type */
else {
- cell->__chunk[0] = type | WT_CELL_64V;
+ cell->__chunk[0] = type | WT_CELL_64V; /* Type */
(void)__wt_vpack_uint(&p, 0, rle); /* RLE */
}
(void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */
@@ -543,31 +590,34 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
/*
* __wt_cell_unpack_safe --
- * Unpack a WT_CELL into a structure during verification.
+ * Unpack a WT_CELL into a structure, with optional boundary checks.
*/
static inline int
-__wt_cell_unpack_safe(
- WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *start, const void *end)
+__wt_cell_unpack_safe(const WT_PAGE_HEADER *dsk,
+ WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end)
{
struct {
- uint32_t len;
uint64_t v;
+ wt_timestamp_t start_ts, stop_ts;
+ uint32_t len;
} copy;
uint64_t v;
const uint8_t *p;
- copy.len = 0;
copy.v = 0; /* -Werror=maybe-uninitialized */
+ copy.start_ts = WT_TS_NONE;
+ copy.stop_ts = WT_TS_MAX;
+ copy.len = 0;
/*
- * The verification code specifies start/end arguments, pointers to the
- * start of the page and to 1 past the end-of-page. In which case, make
- * sure all reads are inside the page image. If an error occurs, return
- * an error code but don't output messages, our caller handles that.
+ * The verification code specifies an end argument, a pointer to 1B past
+ * the end-of-page. In which case, make sure all reads are inside the
+ * page image. If an error occurs, return an error code but don't output
+ * messages, our caller handles that.
*/
#define WT_CELL_LEN_CHK(t, len) do { \
- if (start != NULL && \
- ((uint8_t *)(t) < (uint8_t *)start || \
+ if (end != NULL && \
+ ((uint8_t *)(t) < (uint8_t *)dsk || \
(((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \
return (WT_ERROR); \
} while (0)
@@ -583,13 +633,16 @@ restart:
WT_CELL_LEN_CHK(cell, 0);
unpack->cell = cell;
unpack->v = 0;
+ unpack->start_ts = WT_TS_NONE;
+ unpack->stop_ts = WT_TS_MAX;
unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
unpack->type = (uint8_t)__wt_cell_type(cell);
unpack->ovfl = 0;
/*
- * Handle cells with neither an RLE count or data length: short key/data
- * cells have 6 bits of data length in the descriptor byte.
+ * Handle cells with neither RLE counts, timestamps or a data length:
+ * short key cells have 6 bits of data length in the descriptor byte
+ * and nothing else.
*/
switch (unpack->raw) {
case WT_CELL_KEY_SHORT_PFX:
@@ -600,7 +653,6 @@ restart:
unpack->__len = 2 + unpack->size;
goto done;
case WT_CELL_KEY_SHORT:
- case WT_CELL_VALUE_SHORT:
unpack->prefix = 0;
unpack->data = cell->__chunk + 1;
unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
@@ -625,6 +677,38 @@ restart:
unpack->prefix = cell->__chunk[1];
}
+ /* Check for start/stop timestamps. */
+ switch (unpack->raw) {
+ case WT_CELL_DEL:
+ case WT_CELL_VALUE:
+ case WT_CELL_VALUE_COPY:
+ case WT_CELL_VALUE_OVFL:
+ case WT_CELL_VALUE_OVFL_RM:
+ case WT_CELL_VALUE_SHORT:
+ if (dsk->version < WT_PAGE_VERSION_TS)
+ break;
+
+ WT_RET(__wt_vunpack_uint(&p,
+ end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts));
+ WT_ASSERT(NULL,
+ unpack->start_ts == WT_TS_NONE ||
+ unpack->start_ts == WT_TS_FIXME);
+ WT_RET(__wt_vunpack_uint(&p,
+ end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
+ unpack->stop_ts += unpack->start_ts;
+ WT_ASSERT(NULL,
+ unpack->stop_ts == WT_TS_FIXME ||
+ unpack->stop_ts == WT_TS_MAX);
+ break;
+ }
+
+ if (unpack->raw == WT_CELL_VALUE_SHORT) {
+ unpack->data = p;
+ unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT;
+ unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size;
+ goto done;
+ }
+
/*
* Check for an RLE count or record number that optionally follows the
* cell descriptor byte on column-store variable-length pages.
@@ -649,8 +733,10 @@ restart:
*/
WT_RET(__wt_vunpack_uint(
&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
- copy.len = WT_PTRDIFF32(p, cell);
copy.v = unpack->v;
+ copy.start_ts = unpack->start_ts;
+ copy.stop_ts = unpack->stop_ts;
+ copy.len = WT_PTRDIFF32(p, cell);
cell = (WT_CELL *)((uint8_t *)cell - v);
goto restart;
@@ -703,27 +789,33 @@ restart:
done: WT_CELL_LEN_CHK(cell, unpack->__len);
if (copy.len != 0) {
unpack->raw = WT_CELL_VALUE_COPY;
- unpack->__len = copy.len;
unpack->v = copy.v;
+ unpack->start_ts = copy.start_ts;
+ unpack->stop_ts = copy.stop_ts;
+ unpack->__len = copy.len;
}
return (0);
}
/*
- * __wt_cell_unpack --
+ * __wt_cell_unpack_dsk --
* Unpack a WT_CELL into a structure.
*/
static inline void
-__wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack)
+__wt_cell_unpack_dsk(
+ const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
/*
* Row-store doesn't store zero-length values on pages, but this allows
- * us to pretend.
+ * us to pretend. If there aren't any timestamps (which is what it will
+ * take to get to a zero-length item), the value must be stable.
*/
if (cell == NULL) {
unpack->cell = NULL;
unpack->v = 0;
+ unpack->start_ts = WT_TS_NONE;
+ unpack->stop_ts = WT_TS_MAX;
unpack->data = "";
unpack->size = 0;
unpack->__len = 0;
@@ -733,7 +825,17 @@ __wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack)
return;
}
- (void)__wt_cell_unpack_safe(cell, unpack, NULL, NULL);
+ (void)__wt_cell_unpack_safe(dsk, cell, unpack, NULL);
+}
+
+/*
+ * __wt_cell_unpack --
+ * Unpack a WT_CELL into a structure.
+ */
+static inline void
+__wt_cell_unpack(WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack)
+{
+ __wt_cell_unpack_dsk(page->dsk, cell, unpack);
}
/*
@@ -818,3 +920,27 @@ __wt_page_cell_data_ref(WT_SESSION_IMPL *session,
{
return (__cell_data_ref(session, page, page->type, unpack, store));
}
+
+/*
+ * WT_CELL_FOREACH --
+ * Walk the cells on a page.
+ */
+#define WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, skip_ts) do { \
+ uint32_t __i; \
+ uint8_t *__cell; \
+ for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), \
+ __i = (dsk)->u.entries; \
+ __i > 0; __cell += (unpack).__len, --__i) { \
+ __wt_cell_unpack_dsk(dsk, (WT_CELL *)__cell, &(unpack));\
+ /* \
+ * Optionally skip unstable page entries after downgrade\
+ * to a release without page timestamps. Check for cells\
+ * with unstable timestamps when we're not writing such \
+ * cells ourselves. \
+ */ \
+ if ((skip_ts) && \
+ (unpack).stop_ts != WT_TS_MAX && \
+ !__wt_process.page_version_ts) \
+ continue;
+#define WT_CELL_FOREACH_END \
+ } } while (0)
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 5fb0cee2b91..557f8117f17 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -18,14 +18,18 @@ struct __wt_process {
/* Locked: connection queue */
TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh;
- WT_CACHE_POOL *cache_pool;
-#define WT_TSC_DEFAULT_RATIO 1.0
- double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */
- bool use_epochtime; /* use expensive time */
+
+ bool page_version_ts; /* timestamp version page formats */
/* Checksum function */
#define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len)
uint32_t (*checksum)(const void *, size_t);
+
+#define WT_TSC_DEFAULT_RATIO 1.0
+ double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */
+ bool use_epochtime; /* use expensive time */
+
+ WT_CACHE_POOL *cache_pool; /* shared cache information */
};
extern WT_PROCESS __wt_process;
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 1b301a3d28a..0c5182de528 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -425,7 +425,7 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd)
*/
kpack = &_kpack;
memset(kpack, 0, sizeof(*kpack));
- __wt_cell_unpack(cell, kpack);
+ __wt_cell_unpack(page, cell, kpack);
if (kpack->type == WT_CELL_KEY &&
cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) {
WT_ASSERT(session, cbt->row_key->size >= kpack->prefix);
@@ -495,8 +495,23 @@ __cursor_check_prepared_update(WT_CURSOR_BTREE *cbt, bool *visiblep)
/* The update that returned prepared conflict is now visible. */
F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV);
- if (*visiblep)
- WT_RET(__cursor_kv_return(session, cbt, upd));
+ if (*visiblep) {
+ /*
+ * The underlying key-return function uses a comparison value
+ * of 0 to indicate the search function has pre-built the key
+ * we want to return. That's not the case, don't take that path.
+ */
+ cbt->compare = 1;
+ /*
+ * If a prepared delete operation is resolved, it will be
+ * visible, but key is not valid. The update will be null in
+ * that case and we continue with cursor navigation.
+ */
+ if (upd != NULL)
+ WT_RET(__cursor_kv_return(session, cbt, upd));
+ else
+ *visiblep = false;
+ }
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 50eaa16e847..deb21e86d38 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -155,7 +155,7 @@ extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *add
extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 35bd30419da..6c3cab88923 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -736,6 +736,7 @@ struct __wt_dsrc_stats {
int64_t btree_entries;
int64_t btree_overflow;
int64_t btree_compact_rewrite;
+ int64_t btree_row_empty_values;
int64_t btree_row_internal;
int64_t btree_row_leaf;
int64_t cache_bytes_inuse;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 8d093a81588..aebfce5e0ad 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -41,7 +41,10 @@
#define WT_SESSION_IS_CHECKPOINT(s) \
((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
-#define WT_TS_NONE 0 /* No timestamp */
+#define WT_TS_NONE 0 /* No (or earliest) timestamp) */
+#define WT_TS_FIXME 37 /* Fake timestamp */
+#define WT_TS_MAX UINT64_MAX /* Valid after start time */
+
/* Bytes to hold a hex timestamp */
#define WT_TS_HEX_SIZE (2 * sizeof(wt_timestamp_t) + 1)
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index a297db2cf9e..d0acacae5f3 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -441,7 +441,7 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
*/
timestamp = op->type == WT_TXN_OP_REF_DELETE ?
&op->u.ref->page_del->timestamp : &op->u.op_upd->timestamp;
- if (*timestamp == 0)
+ if (*timestamp == WT_TS_NONE)
*timestamp = txn->commit_timestamp;
}
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 33d0b1ed74d..6fdd1c6408d 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -5827,291 +5827,296 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
/*! btree: pages rewritten by compaction */
#define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2037
/*!
+ * btree: row-store empty values, only reported if tree_walk or all
+ * statistics are enabled
+ */
+#define WT_STAT_DSRC_BTREE_ROW_EMPTY_VALUES 2038
+/*!
* btree: row-store internal pages, only reported if tree_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
+#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2039
/*!
* btree: row-store leaf pages, only reported if tree_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
+#define WT_STAT_DSRC_BTREE_ROW_LEAF 2040
/*! cache: bytes currently in the cache */
-#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
+#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2041
/*! cache: bytes dirty in the cache cumulative */
-#define WT_STAT_DSRC_CACHE_BYTES_DIRTY_TOTAL 2041
+#define WT_STAT_DSRC_CACHE_BYTES_DIRTY_TOTAL 2042
/*! cache: bytes read into cache */
-#define WT_STAT_DSRC_CACHE_BYTES_READ 2042
+#define WT_STAT_DSRC_CACHE_BYTES_READ 2043
/*! cache: bytes written from cache */
-#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2043
+#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2044
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2044
+#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2045
/*! cache: data source pages selected for eviction unable to be evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2045
+#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2046
/*! cache: eviction walk passes of a file */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_PASSES 2046
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_PASSES 2047
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2047
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2048
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2048
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2049
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2049
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2050
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2050
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2051
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2051
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2052
/*! cache: eviction walks abandoned */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2052
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2053
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2053
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2054
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2054
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2055
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2055
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2056
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2056
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2057
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2057
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2058
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2058
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2059
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2059
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2060
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2060
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2061
/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2061
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2062
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2062
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2063
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2063
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2064
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2064
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2065
/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2065
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2066
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2066
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2067
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2067
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2068
/*! cache: page written requiring cache overflow records */
-#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2068
+#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2069
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2069
+#define WT_STAT_DSRC_CACHE_READ 2070
/*! cache: pages read into cache after truncate */
-#define WT_STAT_DSRC_CACHE_READ_DELETED 2070
+#define WT_STAT_DSRC_CACHE_READ_DELETED 2071
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2071
+#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2072
/*! cache: pages read into cache requiring cache overflow entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2072
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2073
/*! cache: pages requested from the cache */
-#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2073
+#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2074
/*! cache: pages seen by eviction walk */
-#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2074
+#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2075
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2075
+#define WT_STAT_DSRC_CACHE_WRITE 2076
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2076
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2077
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2077
+#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2078
/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2078
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2079
/*!
* cache_walk: Average difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2079
+#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2080
/*!
* cache_walk: Average on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2080
+#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2081
/*!
* cache_walk: Average time in cache for pages that have been visited by
* the eviction server, only reported if cache_walk or all statistics are
* enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2081
+#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2082
/*!
* cache_walk: Average time in cache for pages that have not been visited
* by the eviction server, only reported if cache_walk or all statistics
* are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2082
+#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2083
/*!
* cache_walk: Clean pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2083
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2084
/*!
* cache_walk: Current eviction generation, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2084
+#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2085
/*!
* cache_walk: Dirty pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2085
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2086
/*!
* cache_walk: Entries in the root page, only reported if cache_walk or
* all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2086
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2087
/*!
* cache_walk: Internal pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2087
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2088
/*!
* cache_walk: Leaf pages currently in cache, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2088
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2089
/*!
* cache_walk: Maximum difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2089
+#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2090
/*!
* cache_walk: Maximum page size seen, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2090
+#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2091
/*!
* cache_walk: Minimum on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2091
+#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2092
/*!
* cache_walk: Number of pages never visited by eviction server, only
* reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2092
+#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2093
/*!
* cache_walk: On-disk page image sizes smaller than a single allocation
* unit, only reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2093
+#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2094
/*!
* cache_walk: Pages created in memory and never written, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2094
+#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2095
/*!
* cache_walk: Pages currently queued for eviction, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2095
+#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2096
/*!
* cache_walk: Pages that could not be queued for eviction, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2096
+#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2097
/*!
* cache_walk: Refs skipped during cache traversal, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2097
+#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2098
/*!
* cache_walk: Size of the root page, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2098
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2099
/*!
* cache_walk: Total number of pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES 2099
+#define WT_STAT_DSRC_CACHE_STATE_PAGES 2100
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2100
+#define WT_STAT_DSRC_COMPRESS_READ 2101
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2101
+#define WT_STAT_DSRC_COMPRESS_WRITE 2102
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2102
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2103
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2103
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2104
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2104
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2105
/*! cursor: close calls that result in cache */
-#define WT_STAT_DSRC_CURSOR_CACHE 2105
+#define WT_STAT_DSRC_CURSOR_CACHE 2106
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2106
+#define WT_STAT_DSRC_CURSOR_CREATE 2107
/*! cursor: cursor operation restarted */
-#define WT_STAT_DSRC_CURSOR_RESTART 2107
+#define WT_STAT_DSRC_CURSOR_RESTART 2108
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2108
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2109
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2109
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2110
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2110
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2111
/*! cursor: cursors reused from cache */
-#define WT_STAT_DSRC_CURSOR_REOPEN 2111
+#define WT_STAT_DSRC_CURSOR_REOPEN 2112
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2112
+#define WT_STAT_DSRC_CURSOR_INSERT 2113
/*! cursor: modify calls */
-#define WT_STAT_DSRC_CURSOR_MODIFY 2113
+#define WT_STAT_DSRC_CURSOR_MODIFY 2114
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2114
+#define WT_STAT_DSRC_CURSOR_NEXT 2115
/*! cursor: open cursor count */
-#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2115
+#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2116
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2116
+#define WT_STAT_DSRC_CURSOR_PREV 2117
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2117
+#define WT_STAT_DSRC_CURSOR_REMOVE 2118
/*! cursor: reserve calls */
-#define WT_STAT_DSRC_CURSOR_RESERVE 2118
+#define WT_STAT_DSRC_CURSOR_RESERVE 2119
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2119
+#define WT_STAT_DSRC_CURSOR_RESET 2120
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2120
+#define WT_STAT_DSRC_CURSOR_SEARCH 2121
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2121
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2122
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2122
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2123
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2123
+#define WT_STAT_DSRC_CURSOR_UPDATE 2124
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2124
+#define WT_STAT_DSRC_REC_DICTIONARY 2125
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2125
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2126
/*!
* reconciliation: internal page key bytes discarded using suffix
* compression
*/
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2126
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2127
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2127
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2128
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2128
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2129
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2129
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2130
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2130
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2131
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2131
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2132
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2132
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2133
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2133
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2134
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2134
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2135
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2135
+#define WT_STAT_DSRC_REC_PAGES 2136
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2136
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2137
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2137
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2138
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2138
+#define WT_STAT_DSRC_SESSION_COMPACT 2139
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2139
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2140
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index 0a5eb7eacc2..e33c5c29293 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -39,7 +39,7 @@ __ovfl_discard_verbose(
WT_RET(__wt_scr_alloc(session, 512, &tmp));
unpack = &_unpack;
- __wt_cell_unpack(cell, unpack);
+ __wt_cell_unpack(page, cell, unpack);
__wt_verbose(session, WT_VERB_OVERFLOW,
"discard: %s%s%p %s",
@@ -93,7 +93,7 @@ __ovfl_discard_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
session, page, *cellp, "free"));
/* Discard each cell's overflow item. */
- WT_RET(__wt_ovfl_discard(session, *cellp));
+ WT_RET(__wt_ovfl_discard(session, page, *cellp));
}
__wt_free(session, track->discard);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 2b2026f87cc..c55b7970a68 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -278,10 +278,10 @@ static int __rec_cell_build_int_key(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, bool *);
static int __rec_cell_build_leaf_key(WT_SESSION_IMPL *,
WT_RECONCILE *, const void *, size_t, bool *);
-static int __rec_cell_build_ovfl(WT_SESSION_IMPL *,
- WT_RECONCILE *, WT_KV *, uint8_t, uint64_t);
-static int __rec_cell_build_val(WT_SESSION_IMPL *,
- WT_RECONCILE *, const void *, size_t, uint64_t);
+static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *,
+ WT_KV *, uint8_t, wt_timestamp_t, wt_timestamp_t, uint64_t);
+static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *,
+ const void *, size_t, wt_timestamp_t, wt_timestamp_t, uint64_t);
static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
static int __rec_col_fix_slvg(WT_SESSION_IMPL *,
@@ -290,8 +290,6 @@ static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
static int __rec_col_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
static int __rec_col_var(WT_SESSION_IMPL *,
WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
-static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *,
- WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t);
static int __rec_destroy_session(WT_SESSION_IMPL *);
static int __rec_init(WT_SESSION_IMPL *,
WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
@@ -1246,7 +1244,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
/* Track the first update with non-zero timestamp. */
- if (first_ts_upd == NULL && upd->timestamp != 0)
+ if (first_ts_upd == NULL && upd->timestamp != WT_TS_NONE)
first_ts_upd = upd;
/*
@@ -1876,8 +1874,8 @@ __rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv)
* Check for a dictionary match.
*/
static int
-__rec_dict_replace(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t rle, WT_KV *val)
+__rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, WT_KV *val)
{
WT_DICTIONARY *dp;
uint64_t offset;
@@ -1919,8 +1917,8 @@ __rec_dict_replace(
*/
offset = (uint64_t)WT_PTRDIFF(r->first_free,
(uint8_t *)r->cur_ptr->image.mem + dp->offset);
- val->len = val->cell_len =
- __wt_cell_pack_copy(&val->cell, rle, offset);
+ val->len = val->cell_len = __wt_cell_pack_copy(
+ &val->cell, start_ts, stop_ts, rle, offset);
val->buf.data = NULL;
val->buf.size = 0;
}
@@ -2881,6 +2879,8 @@ __rec_split_write_header(WT_SESSION_IMPL *session,
dsk->u.entries = chunk->entries;
dsk->type = page->type;
+ dsk->flags = 0;
+
/* Set the zero-length value flag in the page header. */
if (page->type == WT_PAGE_ROW_LEAF) {
F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
@@ -2899,13 +2899,12 @@ __rec_split_write_header(WT_SESSION_IMPL *session,
if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
F_SET(dsk, WT_PAGE_LAS_UPDATE);
- dsk->unused[0] = dsk->unused[1] = 0;
+ dsk->unused = 0;
- /*
- * There are page header fields which need to be cleared for consistent
- * checksums: specifically, the write generation and the memory owned by
- * the block manager.
- */
+ dsk->version = __wt_process.page_version_ts ?
+ WT_PAGE_VERSION_TS : WT_PAGE_VERSION_ORIG;
+
+ /* Clear the memory owned by the block manager. */
memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header);
}
@@ -3366,7 +3365,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */
cursor->key.data, cursor->key.size, &ovfl_key));
WT_RET(__rec_cell_build_val(session, r, /* Build value cell */
- cursor->value.data, cursor->value.size, (uint64_t)0));
+ cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TS_MAX, 0));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
@@ -3392,7 +3391,8 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
else {
r->all_empty_value = false;
if (btree->dictionary)
- WT_RET(__rec_dict_replace(session, r, 0, val));
+ WT_RET(__rec_dict_replace(
+ session, r, WT_TS_NONE, WT_TS_MAX, 0, val));
__rec_copy_incr(session, r, val);
}
@@ -3518,7 +3518,8 @@ __wt_bulk_insert_var(
val = &r->v;
if (deleted) {
- val->cell_len = __wt_cell_pack_del(&val->cell, cbulk->rle);
+ val->cell_len = __wt_cell_pack_del(
+ &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
@@ -3529,7 +3530,8 @@ __wt_bulk_insert_var(
* value seen, not the current value.
*/
WT_RET(__rec_cell_build_val(session,
- r, cbulk->last.data, cbulk->last.size, cbulk->rle));
+ r, cbulk->last.data, cbulk->last.size,
+ WT_TS_NONE, WT_TS_MAX, cbulk->rle));
/* Boundary: split or write the page. */
if (WT_CROSSING_SPLIT_BND(r, val->len))
@@ -3537,7 +3539,8 @@ __wt_bulk_insert_var(
/* Copy the value onto the page. */
if (btree->dictionary)
- WT_RET(__rec_dict_replace(session, r, cbulk->rle, val));
+ WT_RET(__rec_dict_replace(
+ session, r, WT_TS_NONE, WT_TS_MAX, cbulk->rle, val));
__rec_copy_incr(session, r, val);
/* Update the starting record number in case we split. */
@@ -3657,7 +3660,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
if (addr == NULL && __wt_off_page(page, ref->addr))
addr = ref->addr;
if (addr == NULL) {
- __wt_cell_unpack(ref->addr, vpack);
+ __wt_cell_unpack(page, ref->addr, vpack);
val->buf.data = ref->addr;
val->buf.size = __wt_cell_total_len(vpack);
val->cell_len = 0;
@@ -3924,7 +3927,8 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
static int
__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_SALVAGE_COOKIE *salvage,
- WT_ITEM *value, bool deleted, uint8_t overflow_type, uint64_t rle)
+ WT_ITEM *value, bool deleted, uint8_t overflow_type,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
{
WT_BTREE *btree;
WT_KV *val;
@@ -3965,19 +3969,20 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
if (deleted) {
- val->cell_len = __wt_cell_pack_del(&val->cell, rle);
+ val->cell_len =
+ __wt_cell_pack_del(&val->cell, start_ts, stop_ts, rle);
val->buf.data = NULL;
val->buf.size = 0;
val->len = val->cell_len;
} else if (overflow_type) {
- val->cell_len = __wt_cell_pack_ovfl(
- &val->cell, overflow_type, rle, value->size);
+ val->cell_len = __wt_cell_pack_ovfl(&val->cell,
+ overflow_type, start_ts, stop_ts, rle, value->size);
val->buf.data = value->data;
val->buf.size = value->size;
val->len = val->cell_len + value->size;
} else
- WT_RET(__rec_cell_build_val(
- session, r, value->data, value->size, rle));
+ WT_RET(__rec_cell_build_val(session,
+ r, value->data, value->size, start_ts, stop_ts, rle));
/* Boundary: split or write the page. */
if (__rec_need_split(r, val->len))
@@ -3985,7 +3990,8 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
/* Copy the value onto the page. */
if (!deleted && !overflow_type && btree->dictionary)
- WT_RET(__rec_dict_replace(session, r, rle, val));
+ WT_RET(__rec_dict_replace(
+ session, r, start_ts, stop_ts, rle, val));
__rec_copy_incr(session, r, val);
/* Update the starting record number in case we split. */
@@ -4058,7 +4064,8 @@ __rec_col_var(WT_SESSION_IMPL *session,
salvage->take += salvage->missing;
} else
WT_ERR(__rec_col_var_helper(session,
- r, NULL, NULL, true, false, salvage->missing));
+ r, NULL, NULL, true, false,
+ WT_TS_NONE, WT_TS_MAX, salvage->missing));
}
/*
@@ -4082,7 +4089,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
ins = NULL;
orig_deleted = true;
} else {
- __wt_cell_unpack(cell, vpack);
+ __wt_cell_unpack(page, cell, vpack);
nrepeat = __wt_cell_rle(vpack);
ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
@@ -4228,7 +4235,9 @@ record_loop: /*
if (rle != 0) {
WT_ERR(__rec_col_var_helper(
session, r, salvage, last,
- last_deleted, 0, rle));
+ last_deleted, 0,
+ WT_TS_FIXME, WT_TS_FIXME,
+ rle));
rle = 0;
}
@@ -4236,7 +4245,9 @@ record_loop: /*
last->size = vpack->size;
WT_ERR(__rec_col_var_helper(
session, r, salvage, last, false,
- WT_CELL_VALUE_OVFL, repeat_count));
+ WT_CELL_VALUE_OVFL,
+ WT_TS_FIXME, WT_TS_FIXME,
+ repeat_count));
/* Track if page has overflow items. */
r->ovfl_items = true;
@@ -4284,7 +4295,8 @@ compare: /*
continue;
}
WT_ERR(__rec_col_var_helper(session, r,
- salvage, last, last_deleted, 0, rle));
+ salvage, last, last_deleted, 0,
+ WT_TS_FIXME, WT_TS_FIXME, rle));
}
/*
@@ -4426,7 +4438,8 @@ compare: /*
goto next;
}
WT_ERR(__rec_col_var_helper(session, r,
- salvage, last, last_deleted, 0, rle));
+ salvage, last, last_deleted, 0,
+ WT_TS_FIXME, WT_TS_FIXME, rle));
}
/*
@@ -4471,8 +4484,8 @@ next: if (src_recno == UINT64_MAX)
/* If we were tracking a record, write it. */
if (rle != 0)
- WT_ERR(__rec_col_var_helper(
- session, r, salvage, last, last_deleted, 0, rle));
+ WT_ERR(__rec_col_var_helper(session, r, salvage,
+ last, last_deleted, 0, WT_TS_FIXME, WT_TS_FIXME, rle));
/* Write the remnant page. */
ret = __rec_split_finish(session, r);
@@ -4558,7 +4571,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (ikey != NULL && ikey->cell_offset != 0) {
cell =
WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
- __wt_cell_unpack(cell, kpack);
+ __wt_cell_unpack(page, cell, kpack);
key_onpage_ovfl = kpack->ovfl &&
kpack->raw != WT_CELL_KEY_OVFL_RM;
}
@@ -4654,7 +4667,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
vtype = state == WT_CHILD_PROXY ?
WT_CELL_ADDR_DEL : __rec_vtype(addr);
} else {
- __wt_cell_unpack(ref->addr, vpack);
+ __wt_cell_unpack(page, ref->addr, vpack);
p = vpack->data;
size = vpack->size;
vtype = state == WT_CHILD_PROXY ?
@@ -4840,7 +4853,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
kpack = NULL;
else {
kpack = &_kpack;
- __wt_cell_unpack(cell, kpack);
+ __wt_cell_unpack(page, cell, kpack);
}
/* Unpack the on-page value cell, and look for an update. */
@@ -4873,8 +4886,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
p = tmpval->data;
size = tmpval->size;
}
- WT_ERR(__rec_cell_build_val(
- session, r, p, size, (uint64_t)0));
+ WT_ERR(__rec_cell_build_val(session, r,
+ p, size, WT_TS_FIXME, WT_TS_FIXME, 0));
dictionary = true;
} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
/*
@@ -4920,7 +4933,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
*/
WT_ERR(__rec_cell_build_val(session, r,
"ovfl-unused", strlen("ovfl-unused"),
- (uint64_t)0));
+ WT_TS_FIXME, WT_TS_FIXME, 0));
} else {
val->buf.data = vpack->cell;
val->buf.size = __wt_cell_total_len(vpack);
@@ -4947,7 +4960,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_ERR(__rec_cell_build_val(session, r,
cbt->iface.value.data,
- cbt->iface.value.size, (uint64_t)0));
+ cbt->iface.value.size,
+ WT_TS_FIXME, WT_TS_FIXME, 0));
dictionary = true;
break;
case WT_UPDATE_STANDARD:
@@ -4963,7 +4977,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
} else {
WT_ERR(__rec_cell_build_val(session, r,
upd->data, upd->size,
- (uint64_t)0));
+ WT_TS_FIXME, WT_TS_FIXME, 0));
dictionary = true;
}
break;
@@ -5045,7 +5059,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
goto build;
kpack = &_kpack;
- __wt_cell_unpack(cell, kpack);
+ __wt_cell_unpack(page, cell, kpack);
if (btree->huffman_key == NULL &&
kpack->type == WT_CELL_KEY &&
tmpkey->size >= kpack->prefix) {
@@ -5117,7 +5131,8 @@ build:
else {
r->all_empty_value = false;
if (dictionary && btree->dictionary)
- WT_ERR(__rec_dict_replace(session, r, 0, val));
+ WT_ERR(__rec_dict_replace(session, r,
+ WT_TS_FIXME, WT_TS_FIXME, 0, val));
__rec_copy_incr(session, r, val);
}
@@ -5194,16 +5209,16 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
WT_RET(__wt_value_return_upd(
session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
WT_RET(__rec_cell_build_val(session, r,
- cbt->iface.value.data,
- cbt->iface.value.size, (uint64_t)0));
+ cbt->iface.value.data, cbt->iface.value.size,
+ WT_TS_FIXME, WT_TS_FIXME, 0));
break;
case WT_UPDATE_STANDARD:
if (upd->size == 0)
val->len = 0;
else
- WT_RET(__rec_cell_build_val(session,
- r, upd->data, upd->size,
- (uint64_t)0));
+ WT_RET(__rec_cell_build_val(session, r,
+ upd->data, upd->size,
+ WT_TS_FIXME, WT_TS_FIXME, 0));
break;
case WT_UPDATE_TOMBSTONE:
continue;
@@ -5239,7 +5254,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
else {
r->all_empty_value = false;
if (btree->dictionary)
- WT_RET(__rec_dict_replace(session, r, 0, val));
+ WT_RET(__rec_dict_replace(session, r,
+ WT_TS_FIXME, WT_TS_FIXME, 0, val));
__rec_copy_incr(session, r, val);
}
@@ -5666,8 +5682,8 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session,
WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
*is_ovflp = true;
- return (__rec_cell_build_ovfl(
- session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
+ return (__rec_cell_build_ovfl(session, r,
+ key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0));
}
key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
@@ -5766,8 +5782,8 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
*is_ovflp = true;
- return (__rec_cell_build_ovfl(
- session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
+ return (__rec_cell_build_ovfl(session, r, key,
+ WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0));
}
return (
__rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
@@ -5820,8 +5836,9 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* be stored on the page.
*/
static int
-__rec_cell_build_val(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, const void *data, size_t size, uint64_t rle)
+__rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ const void *data, size_t size,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
{
WT_BTREE *btree;
WT_KV *val;
@@ -5849,11 +5866,12 @@ __rec_cell_build_val(WT_SESSION_IMPL *session,
if (val->buf.size > btree->maxleafvalue) {
WT_STAT_DATA_INCR(session, rec_overflow_value);
- return (__rec_cell_build_ovfl(
- session, r, val, WT_CELL_VALUE_OVFL, rle));
+ return (__rec_cell_build_ovfl(session, r,
+ val, WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle));
}
}
- val->cell_len = __wt_cell_pack_data(&val->cell, rle, val->buf.size);
+ val->cell_len = __wt_cell_pack_data(
+ &val->cell, start_ts, stop_ts, rle, val->buf.size);
val->len = val->cell_len + val->buf.size;
return (0);
@@ -5865,7 +5883,8 @@ __rec_cell_build_val(WT_SESSION_IMPL *session,
*/
static int
__rec_cell_build_ovfl(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_KV *kv, uint8_t type, uint64_t rle)
+ WT_RECONCILE *r, WT_KV *kv, uint8_t type,
+ wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle)
{
WT_BM *bm;
WT_BTREE *btree;
@@ -5924,7 +5943,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
WT_ERR(__wt_buf_set(session, &kv->buf, addr, size));
/* Build the cell and return. */
- kv->cell_len = __wt_cell_pack_ovfl(&kv->cell, type, rle, kv->buf.size);
+ kv->cell_len = __wt_cell_pack_ovfl(
+ &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size);
kv->len = kv->cell_len + kv->buf.size;
err: __wt_scr_free(session, &tmp);
diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c
index f71f91a4daa..3b1cfbbf936 100644
--- a/src/third_party/wiredtiger/src/support/global.c
+++ b/src/third_party/wiredtiger/src/support/global.c
@@ -12,11 +12,11 @@ WT_PROCESS __wt_process; /* Per-process structure */
static int __wt_pthread_once_failed; /* If initialization failed */
/*
- * __wt_endian_check --
+ * __endian_check --
* Check the build matches the machine.
*/
static int
-__wt_endian_check(void)
+__endian_check(void)
{
uint64_t v;
const char *e;
@@ -103,11 +103,11 @@ __global_calibrate_ticks(void)
}
/*
- * __wt_global_once --
+ * __global_once --
* Global initialization, run once.
*/
static void
-__wt_global_once(void)
+__global_once(void)
{
WT_DECL_RET;
@@ -117,10 +117,17 @@ __wt_global_once(void)
return;
}
+ TAILQ_INIT(&__wt_process.connqh);
+
+#if defined(HAVE_PAGE_VERSION_TS)
+ __wt_process.page_version_ts = true;
+#else
+ __wt_process.page_version_ts = false;
+#endif
+
__wt_process.checksum = wiredtiger_crc32c_func();
- __global_calibrate_ticks();
- TAILQ_INIT(&__wt_process.connqh);
+ __global_calibrate_ticks();
}
/*
@@ -134,7 +141,7 @@ __wt_library_init(void)
WT_DECL_RET;
/* Check the build matches the machine. */
- WT_RET(__wt_endian_check());
+ WT_RET(__endian_check());
/*
* Do per-process initialization once, before anything else, but only
@@ -143,7 +150,7 @@ __wt_library_init(void)
* static and only using that function to avoid a race.
*/
if (first) {
- if ((ret = __wt_once(__wt_global_once)) != 0)
+ if ((ret = __wt_once(__global_once)) != 0)
__wt_pthread_once_failed = ret;
first = false;
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index cee1f270d6d..562c00fe146 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -41,6 +41,7 @@ static const char * const __stats_dsrc_desc[] = {
"btree: number of key/value pairs",
"btree: overflow pages",
"btree: pages rewritten by compaction",
+ "btree: row-store empty values",
"btree: row-store internal pages",
"btree: row-store leaf pages",
"cache: bytes currently in the cache",
@@ -223,6 +224,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->btree_entries = 0;
stats->btree_overflow = 0;
stats->btree_compact_rewrite = 0;
+ stats->btree_row_empty_values = 0;
stats->btree_row_internal = 0;
stats->btree_row_leaf = 0;
/* not clearing cache_bytes_inuse */
@@ -390,6 +392,7 @@ __wt_stat_dsrc_aggregate_single(
to->btree_entries += from->btree_entries;
to->btree_overflow += from->btree_overflow;
to->btree_compact_rewrite += from->btree_compact_rewrite;
+ to->btree_row_empty_values += from->btree_row_empty_values;
to->btree_row_internal += from->btree_row_internal;
to->btree_row_leaf += from->btree_row_leaf;
to->cache_bytes_inuse += from->cache_bytes_inuse;
@@ -578,6 +581,8 @@ __wt_stat_dsrc_aggregate(
to->btree_overflow += WT_STAT_READ(from, btree_overflow);
to->btree_compact_rewrite +=
WT_STAT_READ(from, btree_compact_rewrite);
+ to->btree_row_empty_values +=
+ WT_STAT_READ(from, btree_row_empty_values);
to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 4ddc75afe6c..17044be4c34 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -643,7 +643,7 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
* Check timestamps are used in order.
*/
op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
- upd_zero_ts = upd->timestamp == 0;
+ upd_zero_ts = upd->timestamp == WT_TS_NONE;
if (op_zero_ts != upd_zero_ts)
WT_RET_MSG(session, EINVAL,
"per-key timestamps used inconsistently");
@@ -660,7 +660,7 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session)
* Only if the update structure doesn't have a timestamp
* then use the one in the transaction structure.
*/
- if (op_timestamp == 0)
+ if (op_timestamp == WT_TS_NONE)
op_timestamp = txn->commit_timestamp;
if (op_timestamp < upd->timestamp)
WT_RET_MSG(session, EINVAL,
@@ -1177,7 +1177,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
checkpoint_timestamp = txn_global->checkpoint_timestamp;
commit_timestamp = txn_global->commit_timestamp;
pinned_timestamp = txn_global->pinned_timestamp;
- if (checkpoint_timestamp != 0 &&
+ if (checkpoint_timestamp != WT_TS_NONE &&
checkpoint_timestamp < pinned_timestamp)
pinned_timestamp = checkpoint_timestamp;
WT_STAT_SET(session, stats, txn_pinned_timestamp,
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 3e377043fa7..67a0ca91dae 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -765,7 +765,7 @@ __txn_checkpoint_can_skip(WT_SESSION_IMPL *session,
* more that could be written.
*/
if (use_timestamp && txn_global->has_stable_timestamp &&
- txn_global->last_ckpt_timestamp != 0 &&
+ txn_global->last_ckpt_timestamp != WT_TS_NONE &&
txn_global->last_ckpt_timestamp == txn_global->stable_timestamp) {
*can_skipp = true;
return (0);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 7b8e2ca06ae..d8d08c73863 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -104,7 +104,8 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
* strict timestamp checking, assert that all more recent
* updates were also rolled back.
*/
- if (upd->txnid == WT_TXN_ABORTED || upd->timestamp == 0) {
+ if (upd->txnid == WT_TXN_ABORTED ||
+ upd->timestamp == WT_TS_NONE) {
if (upd == first_upd)
first_upd = upd->next;
} else if (rollback_timestamp < upd->timestamp) {
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index b58152f8599..adb52772eea 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -123,7 +123,7 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval)
{
WT_RET(__wt_txn_parse_timestamp_raw(session, name, timestamp, cval));
- if (cval->len != 0 && *timestamp == 0)
+ if (cval->len != 0 && *timestamp == WT_TS_NONE)
WT_RET_MSG(session, EINVAL,
"Failed to parse %s timestamp '%.*s': zero not permitted",
name, (int)cval->len, cval->str);
@@ -160,7 +160,7 @@ __txn_get_pinned_timestamp(
/* Check for a running checkpoint */
if (LF_ISSET(WT_TXN_TS_INCLUDE_CKPT) &&
- txn_global->checkpoint_timestamp != 0 &&
+ txn_global->checkpoint_timestamp != WT_TS_NONE &&
(tmp_ts == 0 || txn_global->checkpoint_timestamp < tmp_ts))
tmp_ts = txn_global->checkpoint_timestamp;
if (!txn_has_write_lock)
diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
index 2f1d4345ad7..8db6fc1ebc4 100755
--- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh
+++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh
@@ -2,6 +2,9 @@
set -e
+# Bypass this test for valgrind
+test "$TESTUTIL_BYPASS_VALGRIND" = "1" && exit 0
+
# Smoke-test checkpoints as part of running "make check".
echo "checkpoint: 3 mixed tables"
$TEST_WRAPPER ./t -T 3 -t m
diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
index 2757f991c2a..6df68da932d 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c
@@ -98,6 +98,10 @@ main(int argc, char *argv[])
uint64_t i, id;
char buf[100];
+ /* Bypass this test for valgrind */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
+ return (EXIT_SUCCESS);
+
opts = &_opts;
memset(opts, 0, sizeof(*opts));
opts->table_type = TABLE_ROW;
diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
index 0b99df76cf3..6cd94ba7572 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c
@@ -92,6 +92,10 @@ main(int argc, char *argv[])
TEST_OPTS *opts, _opts;
const char *tablename;
+ /* Bypass this test for valgrind */
+ if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND"))
+ return (EXIT_SUCCESS);
+
opts = &_opts;
sharedopts = &_sharedopts;
memset(opts, 0, sizeof(*opts));
diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
index a8d44bf3dab..521e67b2439 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c
@@ -163,9 +163,7 @@ main(int argc, char *argv[])
testutil_assert(count == 0);
testutil_progress(opts, "cleanup starting");
-#if 0
testutil_cleanup(opts);
-#endif
return (EXIT_SUCCESS);
}
diff --git a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
index 80911ddfd2d..2021ff1849e 100644
--- a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c
@@ -186,10 +186,7 @@ main(int argc, char *argv[])
testutil_assert(nfail == 0);
testutil_progress(opts, "cleanup starting");
-#if 0
testutil_cleanup(opts);
-#endif
-
return (0);
}
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index e9063674476..4b8eadeea1d 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -377,6 +377,13 @@ mmrand(WT_RAND_STATE *rnd, u_int min, u_int max)
uint32_t v;
u_int range;
+ /*
+ * Test runs with small row counts can easily pass a max of 0 (for
+ * example, "g.rows / 20"). Avoid the problem.
+ */
+ if (min <= max)
+ return (min);
+
v = rng(rnd);
range = (max - min) + 1;
v %= range;
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index f92f438a4f1..d5ed0320761 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -276,6 +276,44 @@ wts_ops(int lastrun)
free(tinfo_list);
}
+typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation;
+
+/*
+ * read_op --
+ * Perform a read operation, waiting out prepare conflicts.
+ */
+static inline int
+read_op(WT_CURSOR *cursor, read_operation op, int *exactp)
+{
+ WT_DECL_RET;
+
+ /*
+ * Read operations wait out prepare-conflicts. (As part of the snapshot
+ * isolation checks, we repeat reads that succeeded before, they should
+ * be repeatable.)
+ */
+ switch (op) {
+ case NEXT:
+ while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case PREV:
+ while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case SEARCH:
+ while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ case SEARCH_NEAR:
+ while ((ret =
+ cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT)
+ __wt_yield();
+ break;
+ }
+ return (ret);
+}
+
typedef enum { INSERT, MODIFY, READ, REMOVE, TRUNCATE, UPDATE } thread_op;
typedef struct {
thread_op op; /* Operation */
@@ -401,7 +439,7 @@ snap_check(WT_CURSOR *cursor,
}
}
- switch (ret = cursor->search(cursor)) {
+ switch (ret = read_op(cursor, SEARCH, NULL)) {
case 0:
if (g.type == FIX) {
testutil_check(
@@ -634,12 +672,22 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
*/
#define OP_FAILED(notfound_ok) do { \
positioned = false; \
- if (intxn && (ret == WT_CACHE_FULL || \
- ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK)) \
+ if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \
goto rollback; \
testutil_assert((notfound_ok && ret == WT_NOTFOUND) || \
- ret == WT_CACHE_FULL || \
- ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK); \
+ ret == WT_CACHE_FULL || ret == WT_ROLLBACK); \
+} while (0)
+
+/*
+ * Rollback updates returning prepare-conflict, they're unlikely to succeed
+ * unless the prepare aborts. Reads wait out the error, so it's unexpected.
+ */
+#define READ_OP_FAILED(notfound_ok) \
+ OP_FAILED(notfound_ok)
+#define WRITE_OP_FAILED(notfound_ok) do { \
+ if (ret == WT_PREPARE_CONFLICT) \
+ ret = WT_ROLLBACK; \
+ OP_FAILED(notfound_ok); \
} while (0)
/*
@@ -826,7 +874,7 @@ ops(void *arg)
positioned = true;
SNAP_TRACK(READ, tinfo);
} else
- OP_FAILED(true);
+ READ_OP_FAILED(true);
}
/* Optionally reserve a row. */
@@ -845,7 +893,7 @@ ops(void *arg)
__wt_yield(); /* Let other threads proceed. */
} else
- OP_FAILED(true);
+ WRITE_OP_FAILED(true);
}
/* Perform the operation. */
@@ -875,7 +923,7 @@ ops(void *arg)
++tinfo->insert;
SNAP_TRACK(INSERT, tinfo);
} else
- OP_FAILED(false);
+ WRITE_OP_FAILED(false);
break;
case MODIFY:
/*
@@ -899,7 +947,7 @@ ops(void *arg)
positioned = true;
SNAP_TRACK(MODIFY, tinfo);
} else
- OP_FAILED(true);
+ WRITE_OP_FAILED(true);
break;
case READ:
++tinfo->search;
@@ -908,7 +956,7 @@ ops(void *arg)
positioned = true;
SNAP_TRACK(READ, tinfo);
} else
- OP_FAILED(true);
+ READ_OP_FAILED(true);
break;
case REMOVE:
remove_instead_of_truncate:
@@ -929,7 +977,7 @@ remove_instead_of_truncate:
*/
SNAP_TRACK(REMOVE, tinfo);
} else
- OP_FAILED(true);
+ WRITE_OP_FAILED(true);
break;
case TRUNCATE:
/*
@@ -958,7 +1006,8 @@ remove_instead_of_truncate:
* vice-versa).
*/
greater_than = mmrand(&tinfo->rnd, 0, 1) == 1;
- range = mmrand(&tinfo->rnd, 1, (u_int)g.rows / 20);
+ range = g.rows < 20 ?
+ 1 : mmrand(&tinfo->rnd, 1, (u_int)g.rows / 20);
tinfo->last = tinfo->keyno;
if (greater_than) {
if (g.c_reverse) {
@@ -992,14 +1041,15 @@ remove_instead_of_truncate:
ret = col_truncate(tinfo, cursor);
break;
}
- positioned = false;
(void)__wt_atomic_subv64(&g.truncate_cnt, 1);
+ /* Truncate never leaves the cursor positioned. */
+ positioned = false;
if (ret == 0) {
++tinfo->truncate;
SNAP_TRACK(TRUNCATE, tinfo);
} else
- OP_FAILED(false);
+ WRITE_OP_FAILED(false);
break;
case UPDATE:
update_instead_of_chosen_op:
@@ -1017,7 +1067,7 @@ update_instead_of_chosen_op:
positioned = true;
SNAP_TRACK(UPDATE, tinfo);
} else
- OP_FAILED(false);
+ WRITE_OP_FAILED(false);
break;
}
@@ -1033,7 +1083,7 @@ update_instead_of_chosen_op:
if ((ret = nextprev(tinfo, cursor, next)) == 0)
continue;
- OP_FAILED(true);
+ READ_OP_FAILED(true);
break;
}
}
@@ -1066,9 +1116,8 @@ update_instead_of_chosen_op:
*/
if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) {
ret = prepare_transaction(tinfo, session);
- testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT);
- if (ret == WT_PREPARE_CONFLICT)
- goto rollback;
+ if (ret != 0)
+ WRITE_OP_FAILED(false);
__wt_yield(); /* Let other threads proceed. */
}
@@ -1193,11 +1242,11 @@ read_row_worker(
}
if (sn) {
- ret = cursor->search_near(cursor, &exact);
+ ret = read_op(cursor, SEARCH_NEAR, &exact);
if (ret == 0 && exact != 0)
ret = WT_NOTFOUND;
} else
- ret = cursor->search(cursor);
+ ret = read_op(cursor, SEARCH, NULL);
switch (ret) {
case 0:
if (g.type == FIX) {
@@ -1288,7 +1337,7 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
keyno = 0;
which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev";
- switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) {
+ switch (ret = read_op(cursor, next ? NEXT : PREV, NULL)) {
case 0:
switch (g.type) {
case FIX:
@@ -2019,7 +2068,7 @@ row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
}
/* We use the cursor in overwrite mode, check for existence. */
- if ((ret = cursor->search(cursor)) == 0)
+ if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
ret = cursor->remove(cursor);
if (ret != 0 && ret != WT_NOTFOUND)
@@ -2053,7 +2102,7 @@ col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->keyno);
/* We use the cursor in overwrite mode, check for existence. */
- if ((ret = cursor->search(cursor)) == 0)
+ if ((ret = read_op(cursor, SEARCH, NULL)) == 0)
ret = cursor->remove(cursor);
if (ret != 0 && ret != WT_NOTFOUND)
diff --git a/src/third_party/wiredtiger/test/suite/test_dictionary.py b/src/third_party/wiredtiger/test/suite/test_dictionary.py
new file mode 100644
index 00000000000..f624e1ade35
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_dictionary.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_dictionary.py
+# Smoke test dictionary compression.
+
+from wtscenario import make_scenarios
+from wtdataset import simple_key
+from wiredtiger import stat
+import wiredtiger, wttest
+
+# Smoke test dictionary compression.
+class test_dictionary(wttest.WiredTigerTestCase):
+ conn_config = 'statistics=(all)'
+ scenarios = make_scenarios([
+ ('row', dict(key_format='S', value_format='S')),
+ ('var', dict(key_format='r', value_format='S')),
+ ])
+
+ # Smoke test dictionary compression.
+ def test_dictionary(self):
+ nentries = 25000
+ uri = 'file:test_dictionary' # This is a btree layer test.
+
+ # Create the object, open the cursor, insert some records with identical values. Use
+ # alternating values, otherwise column-store will RLE compress them into a single item.
+ self.session.create(uri, 'dictionary=100,value_format=S,key_format=' + self.key_format)
+ cursor = self.session.open_cursor(uri, None)
+ i = 0
+ while i < nentries:
+ i = i + 1
+ cursor[simple_key(cursor, i)] = "the same value as the odd items"
+ i = i + 1
+ cursor[simple_key(cursor, i)] = "the same value as the even items"
+ cursor.close()
+
+ # Checkpoint to force the pages through reconciliation.
+ self.session.checkpoint()
+
+ # Confirm the dictionary was effective.
+ cursor = self.session.open_cursor('statistics:' + uri, None, None)
+ self.assertGreater(cursor[stat.dsrc.rec_dictionary][2], nentries - 100)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_empty_value.py b/src/third_party/wiredtiger/test/suite/test_empty_value.py
new file mode 100644
index 00000000000..b40eaaef3d1
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_empty_value.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_dictionary.py
+# Smoke test empty row-store values.
+
+from wtdataset import simple_key
+from wiredtiger import stat
+import wiredtiger, wttest
+
+# Smoke test empty row-store values.
+class test_row_store_empty_values(wttest.WiredTigerTestCase):
+ conn_config = 'statistics=(all)'
+
+ # Smoke test empty row-store values.
+ def test_row_store_empty_values(self):
+ nentries = 25000
+ uri = 'file:test_empty_values' # This is a btree layer test.
+
+ # Create the object, open the cursor, insert some records with zero-length values.
+ self.session.create(uri, 'value_format=u,key_format=S')
+ cursor = self.session.open_cursor(uri, None)
+ for i in xrange(1, nentries + 1):
+ cursor[simple_key(cursor, i)] = ""
+ cursor.close()
+
+ # Reopen to force the object to disk.
+ self.reopen_conn()
+
+ # Confirm the values weren't stored..
+ cursor = self.session.open_cursor('statistics:' + uri, None, 'statistics=(tree_walk)')
+ self.assertEqual(cursor[stat.dsrc.btree_row_empty_values][2], nentries)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c
index 8d5605208cf..b66ae497707 100644
--- a/src/third_party/wiredtiger/test/utility/misc.c
+++ b/src/third_party/wiredtiger/test/utility/misc.c
@@ -199,7 +199,7 @@ bool
testutil_is_flag_set(const char *flag)
{
const char *res;
- bool enable_long_tests;
+ bool flag_being_set;
if (__wt_getenv(NULL, flag, &res) != 0 || res == NULL)
return (false);
@@ -208,11 +208,11 @@ testutil_is_flag_set(const char *flag)
* This is a boolean test. So if the environment variable is set to any
* value other than 0, we return success.
*/
- enable_long_tests = res[0] != '0';
+ flag_being_set = res[0] != '0';
free((void *)res);
- return (enable_long_tests);
+ return (flag_being_set);
}
/*