diff options
47 files changed, 904 insertions, 497 deletions
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 b/src/third_party/wiredtiger/build_posix/aclocal/options.m4 index 47655ba82cb..ff23ab4148c 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/options.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/options.m4 @@ -221,6 +221,20 @@ pthread_adaptive|pthreads_adaptive) esac AC_MSG_RESULT($with_spinlock) +AH_TEMPLATE(HAVE_PAGE_VERSION_TS, + [Define to 1 to enable writing timestamp version page formats.]) +AC_MSG_CHECKING(if --enable-page-version-ts option specified) +AC_ARG_ENABLE(page-version-ts, + [AS_HELP_STRING([--enable-page-version-ts], + [Configure for timestamp version page formats])], + r=$enableval, r=no) +case "$r" in +no) wt_cv_enable_page_version_ts=no;; +*) AC_DEFINE(HAVE_PAGE_VERSION_TS) + wt_cv_enable_page_version_ts=yes;; +esac +AC_MSG_RESULT($wt_cv_enable_page_version_ts) + AC_MSG_CHECKING(if --enable-strict option specified) AC_ARG_ENABLE(strict, [AS_HELP_STRING([--enable-strict], diff --git a/src/third_party/wiredtiger/build_win/wiredtiger_config.h b/src/third_party/wiredtiger/build_win/wiredtiger_config.h index 6c803748f7b..48a41ba2360 100644 --- a/src/third_party/wiredtiger/build_win/wiredtiger_config.h +++ b/src/third_party/wiredtiger/build_win/wiredtiger_config.h @@ -79,6 +79,9 @@ /* Define to 1 to disable any crc32 hardware support. */ /* #undef HAVE_NO_CRC32_HARDWARE */ +/* Define to 1 to enable writing timestamp version page formats. */ +/* #undef HAVE_PAGE_VERSION_TS */ + /* Define to 1 if pthread condition variables support monotonic clocks. */ /* #undef HAVE_PTHREAD_COND_MONOTONIC */ diff --git a/src/third_party/wiredtiger/dist/function.py b/src/third_party/wiredtiger/dist/function.py index 7c632b4e227..22c1d2928b9 100644 --- a/src/third_party/wiredtiger/dist/function.py +++ b/src/third_party/wiredtiger/dist/function.py @@ -90,8 +90,10 @@ def function_args(name, line): line = re.sub("^static ", "", line) line = re.sub("^volatile ", "", line) - # Let WT_UNUSED terminate the parse. It often appears at the beginning - # of the function and looks like a WT_XXX variable declaration. + # Let WT_ASSERT and WT_UNUSED terminate the parse. The often appear at the + # beginning of the function and looks like a WT_XXX variable declaration. + if re.search('^WT_ASSERT', line): + return False,0 if re.search('^WT_UNUSED', line): return False,0 diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index c5546aed751..e6892f63c72 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -96,6 +96,7 @@ DIRECTIO DNE DOI DONTNEED +DSK DUPLICATEV DataSet DbCursor @@ -378,6 +379,7 @@ ThreadList ThreadListWrapper Timespec Timestamp +Timestamps TryCV TxnID UDF @@ -398,10 +400,12 @@ UnmapViewOfFile Unmarshall Unordered Uryyb +VALGRIND VARCHAR VLDB VMSG VR +VRFY VX Vc Vfprintf diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 50e7be0039f..feee57aa476 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -616,6 +616,7 @@ dsrc_stats = [ BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale,size'), BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale,size'), BtreeStat('btree_overflow', 'overflow pages', 'no_scale,tree_walk'), + BtreeStat('btree_row_empty_values', 'row-store empty values', 'no_scale,tree_walk'), BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale,tree_walk'), BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale,tree_walk'), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 53289581e95..a08bdd38a69 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "d5793d4dd57bb763079e3f79821444e7e666ff44", + "commit": "e6c1b9724ed6ed2879a36d7e140f4fa9daceb261", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-4.2" diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 05dd7c70cc0..59331b1978a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -224,7 +224,7 @@ new_page: /* Find the matching WT_COL slot. */ if (cbt->cip_saved != cip) { if ((cell = WT_COL_PTR(page, cip)) == NULL) continue; - __wt_cell_unpack(cell, &unpack); + __wt_cell_unpack(page, cell, &unpack); if (unpack.type == WT_CELL_DEL) { if ((rle = __wt_cell_rle(&unpack)) == 1) continue; diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 553c7ca81c2..2451bd97324 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -371,7 +371,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) if (cbt->cip_saved != cip) { if ((cell = WT_COL_PTR(page, cip)) == NULL) continue; - __wt_cell_unpack(cell, &unpack); + __wt_cell_unpack(page, cell, &unpack); if (unpack.type == WT_CELL_DEL) { if (__wt_cell_rle(&unpack) == 1) continue; diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index bedcf37d9f8..7a5d78f8941 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -521,19 +521,15 @@ static int __debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - uint32_t i; + WT_CELL_UNPACK unpack; WT_ASSERT(ds->session, S2BT_SAFE(ds->session) != NULL); btree = S2BT(ds->session); - unpack = &_unpack; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - WT_RET(__debug_cell(ds, dsk, unpack)); - } + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) { + WT_RET(__debug_cell(ds, dsk, &unpack)); + } WT_CELL_FOREACH_END; return (0); } @@ -1001,7 +997,7 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) unpack = NULL; rle = 1; } else { - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); rle = __wt_cell_rle(unpack); } WT_RET(__wt_snprintf( @@ -1209,7 +1205,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) else WT_RET(ds->f(ds, "\t" "txn id %" PRIu64, upd->txnid)); - if (upd->timestamp != 0) { + if (upd->timestamp != WT_TS_NONE) { __wt_timestamp_to_hex_string( hex_timestamp, upd->timestamp); WT_RET(ds->f(ds, ", stamp %s", hex_timestamp)); @@ -1275,6 +1271,7 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack) WT_DECL_ITEM(buf); WT_DECL_RET; WT_SESSION_IMPL *session; + char hex_ts_start[WT_TS_HEX_SIZE], hex_ts_stop[WT_TS_HEX_SIZE]; const char *type; session = ds->session; @@ -1313,6 +1310,12 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack) break; } + if (unpack->start_ts != WT_TS_NONE || unpack->stop_ts != WT_TS_NONE) { + __wt_timestamp_to_hex_string(hex_ts_start, unpack->start_ts); + __wt_timestamp_to_hex_string(hex_ts_stop, unpack->stop_ts); + WT_RET(ds->f(ds, ", ts %s-%s", hex_ts_start, hex_ts_stop)); + } + /* Dump addresses. */ switch (unpack->raw) { case WT_CELL_ADDR_DEL: @@ -1367,6 +1370,12 @@ __debug_cell_data(WT_DBG *ds, if (unpack == NULL) return (__debug_item(ds, tag, "deleted", strlen("deleted"))); + /* + * Row-store references to empty cells return a NULL on-page reference. + */ + if (unpack->cell == NULL) + return (__debug_item(ds, tag, "", 0)); + switch (unpack->raw) { case WT_CELL_ADDR_DEL: case WT_CELL_ADDR_INT: diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c index 6032364fff7..0ac05e59d16 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c +++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c @@ -207,7 +207,7 @@ __wt_ovfl_remove(WT_SESSION_IMPL *session, * Discard an on-page overflow value, and reset the page's cell. */ int -__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) +__wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) { WT_BM *bm; WT_BTREE *btree; @@ -217,7 +217,7 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) bm = btree->bm; unpack = &_unpack; - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); /* * Finally remove overflow key/value objects, called when reconciliation diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 940acbfe3e6..a5ce5878da3 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -272,16 +272,12 @@ static void __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - const WT_PAGE_HEADER *dsk; + WT_CELL_UNPACK unpack; WT_PAGE_INDEX *pindex; WT_REF **refp, *ref; - uint32_t hint, i; + uint32_t hint; btree = S2BT(session); - dsk = page->dsk; - unpack = &_unpack; /* * Walk the page, building references: the page contains value items. @@ -290,15 +286,13 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page) pindex = WT_INTL_INDEX_GET_SAFE(page); refp = pindex->index; hint = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) { ref = *refp++; ref->home = page; ref->pindex_hint = hint++; - - __wt_cell_unpack(cell, unpack); - ref->addr = cell; - ref->ref_recno = unpack->v; - } + ref->addr = unpack.cell; + ref->ref_recno = unpack.v; + } WT_CELL_FOREACH_END; } /* @@ -309,23 +303,17 @@ static void __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - const WT_PAGE_HEADER *dsk; - uint32_t i; + WT_CELL_UNPACK unpack; *np = 0; btree = S2BT(session); - dsk = page->dsk; - unpack = &_unpack; /* Walk the page, counting entries for the repeats array. */ - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - if (__wt_cell_rle(unpack) > 1) + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) { + if (__wt_cell_rle(&unpack) > 1) ++*np; - } + } WT_CELL_FOREACH_END; } /* @@ -338,22 +326,18 @@ __inmem_col_var( WT_SESSION_IMPL *session, WT_PAGE *page, uint64_t recno, size_t *sizep) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; + WT_CELL_UNPACK unpack; WT_COL *cip; WT_COL_RLE *repeats; - const WT_PAGE_HEADER *dsk; size_t size; uint64_t rle; - uint32_t i, indx, n, repeat_off; + uint32_t indx, n, repeat_off; void *p; btree = S2BT(session); - dsk = page->dsk; repeats = NULL; repeat_off = 0; - unpack = &_unpack; /* * Walk the page, building references: the page contains unsorted value @@ -362,9 +346,8 @@ __inmem_col_var( */ indx = 0; cip = page->pg_var; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, cell)); + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) { + WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, unpack.cell)); cip++; /* @@ -373,7 +356,7 @@ __inmem_col_var( * repeats array triggers a re-walk from the start of the page * to determine the size of the array. */ - rle = __wt_cell_rle(unpack); + rle = __wt_cell_rle(&unpack); if (rle > 1) { if (repeats == NULL) { __inmem_col_var_repeats(session, page, &n); @@ -392,7 +375,7 @@ __inmem_col_var( } indx++; recno += rle; - } + } WT_CELL_FOREACH_END; return (0); } @@ -405,19 +388,15 @@ static int __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; + WT_CELL_UNPACK unpack; WT_DECL_ITEM(current); WT_DECL_RET; - const WT_PAGE_HEADER *dsk; WT_PAGE_INDEX *pindex; WT_REF *ref, **refp; - uint32_t hint, i; + uint32_t hint; bool overflow_keys; btree = S2BT(session); - unpack = &_unpack; - dsk = page->dsk; WT_RET(__wt_scr_alloc(session, 0, ¤t)); @@ -430,19 +409,18 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) refp = pindex->index; overflow_keys = false; hint = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) { ref = *refp; ref->home = page; ref->pindex_hint = hint++; - __wt_cell_unpack(cell, unpack); - switch (unpack->type) { + switch (unpack.type) { case WT_CELL_KEY: /* * Note: we don't Huffman encode internal page keys, * there's no decoding work to do. */ - __wt_ref_key_onpage_set(page, ref, unpack); + __wt_ref_key_onpage_set(page, ref, &unpack); break; case WT_CELL_KEY_OVFL: /* @@ -452,10 +430,10 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) * items. */ WT_ERR(__wt_dsk_cell_data_ref( - session, page->type, unpack, current)); + session, page->type, &unpack, current)); WT_ERR(__wt_row_ikey_incr(session, page, - WT_PAGE_DISK_OFFSET(page, cell), + WT_PAGE_DISK_OFFSET(page, unpack.cell), current->data, current->size, ref)); *sizep += sizeof(WT_IKEY) + current->size; @@ -481,7 +459,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) * * Re-create the state of a deleted page. */ - ref->addr = cell; + ref->addr = unpack.cell; WT_REF_SET_STATE(ref, WT_REF_DELETED); ++refp; @@ -500,12 +478,12 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep) case WT_CELL_ADDR_INT: case WT_CELL_ADDR_LEAF: case WT_CELL_ADDR_LEAF_NO: - ref->addr = cell; + ref->addr = unpack.cell; ++refp; break; - WT_ILLEGAL_VALUE_ERR(session, unpack->type); + WT_ILLEGAL_VALUE_ERR(session, unpack.type); } - } + } WT_CELL_FOREACH_END; /* * We track if an internal page has backing overflow keys, as overflow @@ -527,12 +505,10 @@ __inmem_row_leaf_entries( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint32_t *nindxp) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - uint32_t i, nindx; + WT_CELL_UNPACK unpack; + uint32_t nindx; btree = S2BT(session); - unpack = &_unpack; /* * Leaf row-store page entries map to a maximum of one-to-one to the @@ -546,9 +522,8 @@ __inmem_row_leaf_entries( * single on-page (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item. */ nindx = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - switch (unpack->type) { + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { + switch (unpack.type) { case WT_CELL_KEY: case WT_CELL_KEY_OVFL: ++nindx; @@ -556,9 +531,9 @@ __inmem_row_leaf_entries( case WT_CELL_VALUE: case WT_CELL_VALUE_OVFL: break; - WT_ILLEGAL_VALUE(session, unpack->type); + WT_ILLEGAL_VALUE(session, unpack.type); } - } + } WT_CELL_FOREACH_END; *nindxp = nindx; return (0); @@ -572,23 +547,17 @@ static int __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - const WT_PAGE_HEADER *dsk; + WT_CELL_UNPACK unpack; WT_ROW *rip; - uint32_t i; btree = S2BT(session); - dsk = page->dsk; - unpack = &_unpack; /* Walk the page, building indices. */ rip = page->pg_row; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - switch (unpack->type) { + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, true) { + switch (unpack.type) { case WT_CELL_KEY_OVFL: - __wt_row_leaf_key_set_cell(page, rip, cell); + __wt_row_leaf_key_set_cell(page, rip, unpack.cell); ++rip; break; case WT_CELL_KEY: @@ -597,10 +566,11 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) * or prefix compressed), can be directly referenced on * the page to avoid repeatedly unpacking their cells. */ - if (!btree->huffman_key && unpack->prefix == 0) - __wt_row_leaf_key_set(page, rip, unpack); + if (!btree->huffman_key && unpack.prefix == 0) + __wt_row_leaf_key_set(page, rip, &unpack); else - __wt_row_leaf_key_set_cell(page, rip, cell); + __wt_row_leaf_key_set_cell( + page, rip, unpack.cell); ++rip; break; case WT_CELL_VALUE: @@ -610,13 +580,13 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) * their cells. */ if (!btree->huffman_value) - __wt_row_leaf_value_set(page, rip - 1, unpack); + __wt_row_leaf_value_set(page, rip - 1, &unpack); break; case WT_CELL_VALUE_OVFL: break; - WT_ILLEGAL_VALUE(session, unpack->type); + WT_ILLEGAL_VALUE(session, unpack.type); } - } + } WT_CELL_FOREACH_END; /* * We do not currently instantiate keys on leaf pages when the page is diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c index a509bbb88bc..365c852a717 100644 --- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c +++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c @@ -194,11 +194,9 @@ __rebalance_col_walk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) { WT_BTREE *btree; - WT_CELL *cell; WT_CELL_UNPACK unpack; WT_DECL_ITEM(buf); WT_DECL_RET; - uint32_t i; btree = S2BT(session); @@ -213,8 +211,7 @@ __rebalance_col_walk( * location cookie pairs. Keys are on-page/overflow items and location * cookies are WT_CELL_ADDR_XXX items. */ - WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) { - __wt_cell_unpack(cell, &unpack); + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { switch (unpack.type) { case WT_CELL_ADDR_INT: /* An internal page: read it and recursively walk it. */ @@ -237,7 +234,7 @@ __rebalance_col_walk( break; WT_ILLEGAL_VALUE_ERR(session, unpack.type); } - } + } WT_CELL_FOREACH_END; err: __wt_scr_free(session, &buf); return (ret); @@ -278,13 +275,11 @@ __rebalance_row_walk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs) { WT_BTREE *btree; - WT_CELL *cell; WT_CELL_UNPACK key, unpack; WT_DECL_ITEM(buf); WT_DECL_ITEM(leafkey); WT_DECL_RET; size_t len; - uint32_t i; bool first_cell; const void *p; @@ -304,8 +299,7 @@ __rebalance_row_walk( * cookies are WT_CELL_ADDR_XXX items. */ first_cell = true; - WT_CELL_FOREACH(btree, dsk, cell, &unpack, i) { - __wt_cell_unpack(cell, &unpack); + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { switch (unpack.type) { case WT_CELL_KEY: key = unpack; @@ -388,7 +382,7 @@ __rebalance_row_walk( break; WT_ILLEGAL_VALUE_ERR(session, unpack.type); } - } + } WT_CELL_FOREACH_END; err: __wt_scr_free(session, &buf); __wt_scr_free(session, &leafkey); diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index 95ba4114345..6d08901539e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -110,7 +110,7 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) if (page->type == WT_PAGE_COL_VAR) { /* Take the value from the original page cell. */ cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); - __wt_cell_unpack(cell, &unpack); + __wt_cell_unpack(page, cell, &unpack); return (__wt_page_cell_data_ref( session, page, &unpack, &cursor->value)); } diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index a719af982ec..ea46bc8a72c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -521,16 +521,13 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, uint8_t *addr, size_t addr_size, WT_STUFF *ss) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; + WT_CELL_UNPACK unpack; WT_DECL_RET; WT_PAGE *page; WT_TRACK *trk; uint64_t stop_recno; - uint32_t i; btree = S2BT(session); - unpack = &_unpack; page = NULL; trk = NULL; @@ -565,10 +562,9 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session, * the page. */ stop_recno = dsk->recno; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - stop_recno += __wt_cell_rle(unpack); - } + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { + stop_recno += __wt_cell_rle(&unpack); + } WT_CELL_FOREACH_END; trk->col_start = dsk->recno; trk->col_stop = stop_recno - 1; @@ -661,23 +657,20 @@ __slvg_trk_leaf_ovfl( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_TRACK *trk) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; - uint32_t i, ovfl_cnt; + WT_CELL_UNPACK unpack; + uint32_t ovfl_cnt; btree = S2BT(session); - unpack = &_unpack; /* * Two passes: count the overflow items, then copy them into an * allocated array. */ ovfl_cnt = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - if (unpack->ovfl) + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { + if (unpack.ovfl) ++ovfl_cnt; - } + } WT_CELL_FOREACH_END; if (ovfl_cnt == 0) return (0); @@ -686,25 +679,24 @@ __slvg_trk_leaf_ovfl( trk->trk_ovfl_cnt = ovfl_cnt; ovfl_cnt = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { - __wt_cell_unpack(cell, unpack); - if (unpack->ovfl) { - WT_RET(__wt_memdup(session, unpack->data, - unpack->size, &trk->trk_ovfl_addr[ovfl_cnt].addr)); + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, true) { + if (unpack.ovfl) { + WT_RET(__wt_memdup(session, unpack.data, + unpack.size, &trk->trk_ovfl_addr[ovfl_cnt].addr)); trk->trk_ovfl_addr[ovfl_cnt].size = - (uint8_t)unpack->size; + (uint8_t)unpack.size; __wt_verbose(session, WT_VERB_SALVAGE, "%s overflow reference %s", __wt_addr_string(session, trk->trk_addr, trk->trk_addr_size, trk->ss->tmp1), __wt_addr_string(session, - unpack->data, unpack->size, trk->ss->tmp2)); + unpack.data, unpack.size, trk->ss->tmp2)); if (++ovfl_cnt == trk->trk_ovfl_cnt) break; } - } + } WT_CELL_FOREACH_END; return (0); } @@ -1360,7 +1352,7 @@ __slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_COL_FOREACH(page, cip, i) { cell = WT_COL_PTR(page, cip); - __wt_cell_unpack(cell, &unpack); + __wt_cell_unpack(page, cell, &unpack); recno += __wt_cell_rle(&unpack); /* @@ -2083,7 +2075,7 @@ __slvg_row_ovfl(WT_SESSION_IMPL *session, (void)__wt_row_leaf_key_info( page, copy, NULL, &cell, NULL, NULL); if (cell != NULL) { - __wt_cell_unpack(cell, &unpack); + __wt_cell_unpack(page, cell, &unpack); WT_RET(__slvg_row_ovfl_single(session, trk, &unpack)); } __wt_row_leaf_value_cell(page, rip, NULL, &unpack); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index b2e0883010d..5de4029ebc0 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -185,9 +185,9 @@ __split_ovfl_key_cleanup(WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref) ikey->cell_offset = 0; cell = WT_PAGE_REF_OFFSET(page, cell_offset); - __wt_cell_unpack(cell, &kpack); + __wt_cell_unpack(page, cell, &kpack); if (kpack.ovfl && kpack.raw != WT_CELL_KEY_OVFL_RM) - WT_RET(__wt_ovfl_discard(session, cell)); + WT_RET(__wt_ovfl_discard(session, page, cell)); return (0); } @@ -260,7 +260,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, */ WT_ORDERED_READ(ref_addr, ref->addr); if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) { - __wt_cell_unpack((WT_CELL *)ref_addr, &unpack); + __wt_cell_unpack(from_home, (WT_CELL *)ref_addr, &unpack); WT_RET(__wt_calloc_one(session, &addr)); WT_ERR(__wt_memdup( session, unpack.data, unpack.size, &addr->addr)); diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 88efe4e2e24..5fdce5edf96 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -165,7 +165,7 @@ __stat_page_col_var( ++deleted_cnt; } else { orig_deleted = false; - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); if (unpack->type == WT_CELL_DEL) orig_deleted = true; else { @@ -230,9 +230,8 @@ __stat_page_row_int( WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_BTREE *btree; - WT_CELL *cell; WT_CELL_UNPACK unpack; - uint32_t i, ovfl_cnt; + uint32_t ovfl_cnt; btree = S2BT(session); ovfl_cnt = 0; @@ -245,11 +244,10 @@ __stat_page_row_int( * a reference to the original cell. */ if (page->dsk != NULL) - WT_CELL_FOREACH(btree, page->dsk, cell, &unpack, i) { - __wt_cell_unpack(cell, &unpack); - if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL) + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) { + if (__wt_cell_type(unpack.cell) == WT_CELL_KEY_OVFL) ++ovfl_cnt; - } + } WT_CELL_FOREACH_END; WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt); } @@ -263,15 +261,15 @@ __stat_page_row_leaf( WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) { WT_BTREE *btree; - WT_CELL *cell; WT_CELL_UNPACK unpack; WT_INSERT *ins; WT_ROW *rip; WT_UPDATE *upd; - uint32_t entry_cnt, i, ovfl_cnt; + uint32_t empty_values, entry_cnt, i, ovfl_cnt; + bool key; btree = S2BT(session); - entry_cnt = ovfl_cnt = 0; + empty_values = entry_cnt = ovfl_cnt = 0; WT_STAT_INCR(session, stats, btree_row_leaf); @@ -311,14 +309,33 @@ __stat_page_row_leaf( * Overflow keys are hard: we have to walk the disk image to count them, * the in-memory representation of the page doesn't necessarily contain * a reference to the original cell. + * + * Zero-length values are the same, we have to look at the disk image to + * know. They aren't stored but we know they exist if there are two keys + * in a row, or a key as the last item. */ - if (page->dsk != NULL) - WT_CELL_FOREACH(btree, page->dsk, cell, &unpack, i) { - __wt_cell_unpack(cell, &unpack); - if (__wt_cell_type(cell) == WT_CELL_KEY_OVFL) + if (page->dsk != NULL) { + key = false; + WT_CELL_FOREACH_BEGIN(btree, page->dsk, unpack, false) { + switch (__wt_cell_type(unpack.cell)) { + case WT_CELL_KEY_OVFL: ++ovfl_cnt; - } + /* FALLTHROUGH */ + case WT_CELL_KEY: + if (key) + ++empty_values; + key = true; + break; + default: + key = false; + break; + } + } WT_CELL_FOREACH_END; + if (key) + ++empty_values; + } + WT_STAT_INCRV(session, stats, btree_row_empty_values, empty_values); WT_STAT_INCRV(session, stats, btree_entries, entry_cnt); WT_STAT_INCRV(session, stats, btree_overflow, ovfl_cnt); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 7f711be3480..dccfd97f322 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -416,7 +416,7 @@ recno_chk: if (recno != vs->record_total + 1) if ((cell = WT_COL_PTR(page, cip)) == NULL) ++recno; else { - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); recno += __wt_cell_rle(unpack); } vs->record_total += recno; @@ -436,7 +436,7 @@ recno_chk: if (recno != vs->record_total + 1) /* If it's not the root page, unpack the parent cell. */ if (!__wt_ref_is_root(ref)) { - __wt_cell_unpack(ref->addr, unpack); + __wt_cell_unpack(ref->home, ref->addr, unpack); /* Compare the parent cell against the page type. */ switch (page->type) { @@ -533,7 +533,8 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, --vs->depth; WT_RET(ret); - __wt_cell_unpack(child_ref->addr, unpack); + __wt_cell_unpack( + child_ref->home, child_ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); } WT_INTL_FOREACH_END; @@ -563,7 +564,8 @@ celltype_err: WT_RET_MSG(session, WT_ERROR, --vs->depth; WT_RET(ret); - __wt_cell_unpack(child_ref->addr, unpack); + __wt_cell_unpack( + child_ref->home, child_ref->addr, unpack); WT_RET(bm->verify_addr( bm, session, unpack->data, unpack->size)); } WT_INTL_FOREACH_END; @@ -690,16 +692,14 @@ __verify_overflow_cell( WT_SESSION_IMPL *session, WT_REF *ref, bool *found, WT_VSTUFF *vs) { WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK *unpack, _unpack; + WT_CELL_UNPACK unpack; WT_DECL_RET; const WT_PAGE_HEADER *dsk; - uint32_t cell_num, i; + uint32_t cell_num; *found = false; btree = S2BT(session); - unpack = &_unpack; /* * If a tree is empty (just created), it won't have a disk image; @@ -710,18 +710,17 @@ __verify_overflow_cell( /* Walk the disk page, verifying pages referenced by overflow cells. */ cell_num = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, false) { ++cell_num; - __wt_cell_unpack(cell, unpack); - switch (unpack->type) { + switch (unpack.type) { case WT_CELL_KEY_OVFL: case WT_CELL_VALUE_OVFL: *found = true; WT_ERR(__verify_overflow( - session, unpack->data, unpack->size, vs)); + session, unpack.data, unpack.size, vs)); break; } - } + } WT_CELL_FOREACH_END; return (0); @@ -730,7 +729,7 @@ err: WT_RET_MSG(session, ret, "that failed verification", cell_num - 1, __wt_page_addr_string(session, ref, vs->tmp1), - __wt_addr_string(session, unpack->data, unpack->size, vs->tmp2)); + __wt_addr_string(session, unpack.data, unpack.size, vs->tmp2)); } /* diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 39f4a041ea9..8db215bd162 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -39,6 +39,18 @@ static int __verify_dsk_row( } while (0) /* + * WT_CELL_FOREACH_VRFY -- + * Iterate through each cell on a page. Verify-specific version of the + * WT_CELL_FOREACH macro, created because the loop can't simply unpack cells, + * verify has to do additional work to ensure that unpack is safe. + */ +#define WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) \ + for ((cell) = \ + WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; \ + (i) > 0; \ + (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i)) + +/* * __wt_verify_dsk_image -- * Verify a single block as read from disk. */ @@ -48,7 +60,6 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, { uint8_t flags; const uint8_t *p, *end; - u_int i; /* Check the page type. */ switch (dsk->type) { @@ -114,12 +125,22 @@ __wt_verify_dsk_image(WT_SESSION_IMPL *session, "page at %s has invalid flags set: 0x%" PRIx8, tag, flags); - /* Unused bytes */ - for (p = dsk->unused, i = sizeof(dsk->unused); i > 0; --i) - if (*p != '\0') - WT_RET_VRFY(session, - "page at %s has non-zero unused page header bytes", - tag); + /* Check the unused byte. */ + if (dsk->unused != 0) + WT_RET_VRFY(session, + "page at %s has non-zero unused page header bytes", + tag); + + /* Check the page version. */ + switch (dsk->version) { + case WT_PAGE_VERSION_ORIG: + case WT_PAGE_VERSION_TS: + break; + default: + WT_RET_VRFY(session, + "page at %s has an invalid version of %" PRIu8, + tag, dsk->version); + } /* * Any bytes after the data chunk should be nul bytes; ignore if the @@ -226,11 +247,11 @@ __verify_dsk_row( last_cell_type = FIRST; cell_num = 0; key_cnt = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) { ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0) { + if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0) { ret = __err_cell_corrupt(session, cell_num, tag); goto err; } @@ -499,11 +520,11 @@ __verify_dsk_col_int( end = (uint8_t *)dsk + dsk->mem_size; cell_num = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) { ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0) + if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0) return (__err_cell_corrupt(session, cell_num, tag)); /* Check the raw and collapsed cell types. */ @@ -570,11 +591,11 @@ __verify_dsk_col_var( last_deleted = false; cell_num = 0; - WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { + WT_CELL_FOREACH_VRFY(btree, dsk, cell, unpack, i) { ++cell_num; /* Carefully unpack the cell. */ - if (__wt_cell_unpack_safe(cell, unpack, dsk, end) != 0) + if (__wt_cell_unpack_safe(dsk, cell, unpack, end) != 0) return (__err_cell_corrupt(session, cell_num, tag)); /* Check the raw and collapsed cell types. */ diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c index 39eddb6e0cc..b7406d47329 100644 --- a/src/third_party/wiredtiger/src/btree/row_key.c +++ b/src/third_party/wiredtiger/src/btree/row_key.c @@ -262,7 +262,7 @@ switch_and_jump: /* Switching to a forward roll. */ /* * It must be an on-page cell, unpack it. */ - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); /* 3: the test for an on-page reference to an overflow key. */ if (unpack->type == WT_CELL_KEY_OVFL) { @@ -286,7 +286,7 @@ switch_and_jump: /* Switching to a forward roll. */ copy = WT_ROW_KEY_COPY(rip); if (!__wt_row_leaf_key_info(page, copy, NULL, &cell, &keyb->data, &keyb->size)) { - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); ret = __wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, keyb); } diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index b9b66bf0c7b..3f5472ac66a 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -80,12 +80,12 @@ struct __wt_page_header { #define WT_PAGE_LAS_UPDATE 0x10u /* Page updates in lookaside store */ uint8_t flags; /* 25: flags */ - /* - * End the structure with 2 bytes of padding: it wastes space, but it - * leaves the structure 32-bit aligned and having a few bytes to play - * with in the future can't hurt. - */ - uint8_t unused[2]; /* 26-27: unused padding */ + /* A byte of padding, positioned to be added to the flags. */ + uint8_t unused; /* 26: unused padding */ + +#define WT_PAGE_VERSION_ORIG 0 /* Original version */ +#define WT_PAGE_VERSION_TS 1 /* Timestamps added */ + uint8_t version; /* 27: version */ }; /* * WT_PAGE_HEADER_SIZE is the number of bytes we allocate for the structure: if diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index c9004138370..856a761ca4f 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1048,13 +1048,13 @@ __wt_row_leaf_value_cell( page, copy, NULL, &kcell, &key, &size) && kcell == NULL) vcell = (WT_CELL *)((uint8_t *)key + size); else { - __wt_cell_unpack(kcell, &unpack); + __wt_cell_unpack(page, kcell, &unpack); vcell = (WT_CELL *)((uint8_t *) unpack.cell + __wt_cell_total_len(&unpack)); } } - __wt_cell_unpack(__wt_cell_leaf_value_parse(page, vcell), vpack); + __wt_cell_unpack(page, __wt_cell_leaf_value_parse(page, vcell), vpack); } /* @@ -1091,9 +1091,11 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) { WT_ADDR *addr; WT_CELL_UNPACK *unpack, _unpack; + WT_PAGE *page; addr = ref->addr; unpack = &_unpack; + page = ref->home; /* * If NULL, there is no location. @@ -1107,7 +1109,7 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) *sizep = 0; if (typep != NULL) *typep = 0; - } else if (__wt_off_page(ref->home, addr)) { + } else if (__wt_off_page(page, addr)) { *addrp = addr->addr; *sizep = addr->size; if (typep != NULL) @@ -1126,7 +1128,7 @@ __wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep) break; } } else { - __wt_cell_unpack((WT_CELL *)addr, unpack); + __wt_cell_unpack(page, (WT_CELL *)addr, unpack); *addrp = unpack->data; *sizep = unpack->size; if (typep != NULL) @@ -1364,7 +1366,7 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) mod->last_eviction_id != __wt_txn_oldest_id(session)) return (true); - if (mod->last_eviction_timestamp == 0) + if (mod->last_eviction_timestamp == WT_TS_NONE) return (true); __wt_txn_pinned_timestamp(session, &pinned_ts); diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index cea27e1b26f..94e50ae7917 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -16,8 +16,9 @@ * * There are 4 basic cell types: keys and data (each of which has an overflow * form), deleted cells and off-page references. The cell is usually followed - * by additional data, varying by type: a key or data cell is followed by a set - * of bytes, an address cookie follows overflow or off-page cells. + * by additional data, varying by type: keys are followed by a chunk of data, + * data is followed by a pair of timestamps and a chunk of data, overflow and + * off-page cells are followed by an address cookie. * * Deleted cells are place-holders for column-store files, where entries cannot * be removed in order to preserve the record count. @@ -51,7 +52,8 @@ * 0x01 Short key cell * 0x10 Short key cell, with a following prefix-compression byte * 0x11 Short value cell - * In these cases, the other 6 bits of the descriptor byte are the data length. + * In the "short" variants, the other 6 bits of the descriptor byte are the + * data length. * * Bit 3 marks an 8B packed, uint64_t value following the cell description byte. * (A run-length counter or a record number for variable-length column store.) @@ -88,7 +90,7 @@ * * WT_CELL_VALUE_COPY is a reference to a previous cell on the page, supporting * value dictionaries: if the two values are the same, we only store them once - * and have the second and subsequent use reference the original. + * and have any second and subsequent uses reference the original. */ #define WT_CELL_ADDR_DEL (0) /* Address: deleted */ #define WT_CELL_ADDR_INT (1 << 4) /* Address: internal */ @@ -122,16 +124,20 @@ */ struct __wt_cell { /* - * Maximum of 16 bytes: + * Maximum of 34 bytes: * 1: cell descriptor byte * 1: prefix compression count + * 9: start timestamp (uint64_t encoding, max 9 bytes) + * 9: stop timestamp (uint64_t encoding, max 9 bytes) * 9: associated 64-bit value (uint64_t encoding, max 9 bytes) * 5: data length (uint32_t encoding, max 5 bytes) * * This calculation is pessimistic: the prefix compression count and - * 64V value overlap, the 64V value and data length are optional. + * 64V value overlap, the 64V value and data length are optional, and + * timestamps only appear in values. */ - uint8_t __chunk[1 + 1 + WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE]; + uint8_t __chunk[1 + 1 + + 3 * WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE]; }; /* @@ -143,6 +149,9 @@ struct __wt_cell_unpack { uint64_t v; /* RLE count or recno */ + /* start/stop timestamps */ + wt_timestamp_t start_ts, stop_ts; + /* * !!! * The size and __len fields are reasonably type size_t; don't change @@ -162,16 +171,6 @@ struct __wt_cell_unpack { }; /* - * WT_CELL_FOREACH -- - * Walk the cells on a page. - */ -#define WT_CELL_FOREACH(btree, dsk, cell, unpack, i) \ - for ((cell) = \ - WT_PAGE_HEADER_BYTE(btree, dsk), (i) = (dsk)->u.entries; \ - (i) > 0; \ - (cell) = (WT_CELL *)((uint8_t *)(cell) + (unpack)->__len), --(i)) - -/* * __wt_cell_pack_addr -- * Pack an address cell. */ @@ -193,14 +192,33 @@ __wt_cell_pack_addr(WT_CELL *cell, u_int cell_type, uint64_t recno, size_t size) } /* + * __cell_pack_timestamp_pair -- + * Pack a start, stop timestamp pair. + */ +static inline void +__cell_pack_timestamp_pair( + uint8_t **pp, wt_timestamp_t start_ts, wt_timestamp_t stop_ts) +{ + if (__wt_process.page_version_ts) { + /* Start timestamp, stop timestamp difference. */ + (void)__wt_vpack_uint(pp, 0, start_ts); + (void)__wt_vpack_uint(pp, 0, stop_ts - start_ts); + } +} + +/* * __wt_cell_pack_data -- * Set a data item's WT_CELL contents. */ static inline size_t -__wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size) +__wt_cell_pack_data(WT_CELL *cell, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size) { uint8_t byte, *p; + p = cell->__chunk + 1; + __cell_pack_timestamp_pair(&p, start_ts, stop_ts); + /* * Short data cells without run-length encoding have 6 bits of data * length in the descriptor byte. @@ -209,72 +227,86 @@ __wt_cell_pack_data(WT_CELL *cell, uint64_t rle, size_t size) byte = (uint8_t)size; /* Type + length */ cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_VALUE_SHORT); - return (1); - } - - p = cell->__chunk + 1; - if (rle < 2) { - size -= WT_CELL_SIZE_ADJUST; - cell->__chunk[0] = WT_CELL_VALUE; /* Type */ } else { - cell->__chunk[0] = WT_CELL_VALUE | WT_CELL_64V; - (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ + if (rle < 2) { + size -= WT_CELL_SIZE_ADJUST; + cell->__chunk[0] = WT_CELL_VALUE; /* Type */ + } else { + cell->__chunk[0] = WT_CELL_VALUE | WT_CELL_64V; + (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ + } + (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ } - (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ return (WT_PTRDIFF(p, cell)); } /* * __wt_cell_pack_data_match -- - * Return if two items would have identical WT_CELLs (except for any RLE). + * Return if two items would have identical WT_CELLs (except for timestamps + * and any RLE). */ static inline int -__wt_cell_pack_data_match( - WT_CELL *page_cell, WT_CELL *val_cell, const uint8_t *val_data, - bool *matchp) +__wt_cell_pack_data_match(WT_CELL *page_cell, + WT_CELL *val_cell, const uint8_t *val_data, bool *matchp) { - uint64_t av, bv; + uint64_t alen, blen, v; const uint8_t *a, *b; bool rle; - *matchp = 0; /* Default to no-match */ + *matchp = false; /* Default to no-match */ /* * This is a special-purpose function used by reconciliation to support * dictionary lookups. We're passed an on-page cell and a created cell * plus a chunk of data we're about to write on the page, and we return * if they would match on the page. The column-store comparison ignores - * the RLE because the copied cell will have its own RLE. + * the timestamps and the RLE because the copied cell will have its own. */ a = (uint8_t *)page_cell; b = (uint8_t *)val_cell; if (WT_CELL_SHORT_TYPE(a[0]) == WT_CELL_VALUE_SHORT) { - av = a[0] >> WT_CELL_SHORT_SHIFT; + alen = a[0] >> WT_CELL_SHORT_SHIFT; ++a; + if (__wt_process.page_version_ts) { + WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */ + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + } } else if (WT_CELL_TYPE(a[0]) == WT_CELL_VALUE) { - rle = (a[0] & WT_CELL_64V) != 0; /* Skip any RLE */ + rle = (a[0] & WT_CELL_64V) != 0; ++a; - if (rle) - WT_RET(__wt_vunpack_uint(&a, 0, &av)); - WT_RET(__wt_vunpack_uint(&a, 0, &av)); /* Length */ + if (__wt_process.page_version_ts) { + WT_RET(__wt_vunpack_uint(&a, 0, &v)); /* Skip TS */ + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + } + if (rle) /* Skip RLE */ + WT_RET(__wt_vunpack_uint(&a, 0, &v)); + WT_RET(__wt_vunpack_uint(&a, 0, &alen)); /* Length */ } else return (0); if (WT_CELL_SHORT_TYPE(b[0]) == WT_CELL_VALUE_SHORT) { - bv = b[0] >> WT_CELL_SHORT_SHIFT; + blen = b[0] >> WT_CELL_SHORT_SHIFT; ++b; + if (__wt_process.page_version_ts) { + WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */ + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + } } else if (WT_CELL_TYPE(b[0]) == WT_CELL_VALUE) { - rle = (b[0] & WT_CELL_64V) != 0; /* Skip any RLE */ + rle = (b[0] & WT_CELL_64V) != 0; ++b; - if (rle) - WT_RET(__wt_vunpack_uint(&b, 0, &bv)); - WT_RET(__wt_vunpack_uint(&b, 0, &bv)); /* Length */ + if (__wt_process.page_version_ts) { + WT_RET(__wt_vunpack_uint(&b, 0, &v)); /* Skip TS */ + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + } + if (rle) /* Skip RLE */ + WT_RET(__wt_vunpack_uint(&b, 0, &v)); + WT_RET(__wt_vunpack_uint(&b, 0, &blen)); /* Length */ } else return (0); - if (av == bv) - *matchp = memcmp(a, val_data, av) == 0; + if (alen == blen) + *matchp = memcmp(a, val_data, alen) == 0; return (0); } @@ -283,16 +315,19 @@ __wt_cell_pack_data_match( * Write a copy value cell. */ static inline size_t -__wt_cell_pack_copy(WT_CELL *cell, uint64_t rle, uint64_t v) +__wt_cell_pack_copy(WT_CELL *cell, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, uint64_t v) { uint8_t *p; p = cell->__chunk + 1; + __cell_pack_timestamp_pair(&p, start_ts, stop_ts); - if (rle < 2) /* Type */ - cell->__chunk[0] = WT_CELL_VALUE_COPY; - else { /* Type */ - cell->__chunk[0] = WT_CELL_VALUE_COPY | WT_CELL_64V; + if (rle < 2) + cell->__chunk[0] = WT_CELL_VALUE_COPY; /* Type */ + else { + cell->__chunk[0] = /* Type */ + WT_CELL_VALUE_COPY | WT_CELL_64V; (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } (void)__wt_vpack_uint(&p, 0, v); /* Copy offset */ @@ -304,18 +339,21 @@ __wt_cell_pack_copy(WT_CELL *cell, uint64_t rle, uint64_t v) * Write a deleted value cell. */ static inline size_t -__wt_cell_pack_del(WT_CELL *cell, uint64_t rle) +__wt_cell_pack_del(WT_CELL *cell, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) { uint8_t *p; p = cell->__chunk + 1; - if (rle < 2) { /* Type */ - cell->__chunk[0] = WT_CELL_DEL; - return (1); + __cell_pack_timestamp_pair(&p, start_ts, stop_ts); + + if (rle < 2) + cell->__chunk[0] = WT_CELL_DEL; /* Type */ + else { + cell->__chunk[0] = /* Type */ + WT_CELL_DEL | WT_CELL_64V; + (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } - /* Type */ - cell->__chunk[0] = WT_CELL_DEL | WT_CELL_64V; - (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ return (WT_PTRDIFF(p, cell)); } @@ -341,7 +379,6 @@ __wt_cell_pack_int_key(WT_CELL *cell, size_t size) size -= WT_CELL_SIZE_ADJUST; (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ - return (WT_PTRDIFF(p, cell)); } @@ -380,7 +417,6 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) size -= WT_CELL_SIZE_ADJUST; (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ - return (WT_PTRDIFF(p, cell)); } @@ -389,15 +425,26 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) * Pack an overflow cell. */ static inline size_t -__wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type, uint64_t rle, size_t size) +__wt_cell_pack_ovfl(WT_CELL *cell, uint8_t type, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, size_t size) { uint8_t *p; p = cell->__chunk + 1; - if (rle < 2) /* Type */ - cell->__chunk[0] = type; + switch (type) { + case WT_CELL_KEY_OVFL: + case WT_CELL_KEY_OVFL_RM: + break; + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + __cell_pack_timestamp_pair(&p, start_ts, stop_ts); + break; + } + + if (rle < 2) + cell->__chunk[0] = type; /* Type */ else { - cell->__chunk[0] = type | WT_CELL_64V; + cell->__chunk[0] = type | WT_CELL_64V; /* Type */ (void)__wt_vpack_uint(&p, 0, rle); /* RLE */ } (void)__wt_vpack_uint(&p, 0, (uint64_t)size); /* Length */ @@ -543,31 +590,34 @@ __wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell) /* * __wt_cell_unpack_safe -- - * Unpack a WT_CELL into a structure during verification. + * Unpack a WT_CELL into a structure, with optional boundary checks. */ static inline int -__wt_cell_unpack_safe( - WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *start, const void *end) +__wt_cell_unpack_safe(const WT_PAGE_HEADER *dsk, + WT_CELL *cell, WT_CELL_UNPACK *unpack, const void *end) { struct { - uint32_t len; uint64_t v; + wt_timestamp_t start_ts, stop_ts; + uint32_t len; } copy; uint64_t v; const uint8_t *p; - copy.len = 0; copy.v = 0; /* -Werror=maybe-uninitialized */ + copy.start_ts = WT_TS_NONE; + copy.stop_ts = WT_TS_MAX; + copy.len = 0; /* - * The verification code specifies start/end arguments, pointers to the - * start of the page and to 1 past the end-of-page. In which case, make - * sure all reads are inside the page image. If an error occurs, return - * an error code but don't output messages, our caller handles that. + * The verification code specifies an end argument, a pointer to 1B past + * the end-of-page. In which case, make sure all reads are inside the + * page image. If an error occurs, return an error code but don't output + * messages, our caller handles that. */ #define WT_CELL_LEN_CHK(t, len) do { \ - if (start != NULL && \ - ((uint8_t *)(t) < (uint8_t *)start || \ + if (end != NULL && \ + ((uint8_t *)(t) < (uint8_t *)dsk || \ (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ return (WT_ERROR); \ } while (0) @@ -583,13 +633,16 @@ restart: WT_CELL_LEN_CHK(cell, 0); unpack->cell = cell; unpack->v = 0; + unpack->start_ts = WT_TS_NONE; + unpack->stop_ts = WT_TS_MAX; unpack->raw = (uint8_t)__wt_cell_type_raw(cell); unpack->type = (uint8_t)__wt_cell_type(cell); unpack->ovfl = 0; /* - * Handle cells with neither an RLE count or data length: short key/data - * cells have 6 bits of data length in the descriptor byte. + * Handle cells with neither RLE counts, timestamps or a data length: + * short key cells have 6 bits of data length in the descriptor byte + * and nothing else. */ switch (unpack->raw) { case WT_CELL_KEY_SHORT_PFX: @@ -600,7 +653,6 @@ restart: unpack->__len = 2 + unpack->size; goto done; case WT_CELL_KEY_SHORT: - case WT_CELL_VALUE_SHORT: unpack->prefix = 0; unpack->data = cell->__chunk + 1; unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; @@ -625,6 +677,38 @@ restart: unpack->prefix = cell->__chunk[1]; } + /* Check for start/stop timestamps. */ + switch (unpack->raw) { + case WT_CELL_DEL: + case WT_CELL_VALUE: + case WT_CELL_VALUE_COPY: + case WT_CELL_VALUE_OVFL: + case WT_CELL_VALUE_OVFL_RM: + case WT_CELL_VALUE_SHORT: + if (dsk->version < WT_PAGE_VERSION_TS) + break; + + WT_RET(__wt_vunpack_uint(&p, + end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); + WT_ASSERT(NULL, + unpack->start_ts == WT_TS_NONE || + unpack->start_ts == WT_TS_FIXME); + WT_RET(__wt_vunpack_uint(&p, + end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); + unpack->stop_ts += unpack->start_ts; + WT_ASSERT(NULL, + unpack->stop_ts == WT_TS_FIXME || + unpack->stop_ts == WT_TS_MAX); + break; + } + + if (unpack->raw == WT_CELL_VALUE_SHORT) { + unpack->data = p; + unpack->size = cell->__chunk[0] >> WT_CELL_SHORT_SHIFT; + unpack->__len = WT_PTRDIFF32(p, cell) + unpack->size; + goto done; + } + /* * Check for an RLE count or record number that optionally follows the * cell descriptor byte on column-store variable-length pages. @@ -649,8 +733,10 @@ restart: */ WT_RET(__wt_vunpack_uint( &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); - copy.len = WT_PTRDIFF32(p, cell); copy.v = unpack->v; + copy.start_ts = unpack->start_ts; + copy.stop_ts = unpack->stop_ts; + copy.len = WT_PTRDIFF32(p, cell); cell = (WT_CELL *)((uint8_t *)cell - v); goto restart; @@ -703,27 +789,33 @@ restart: done: WT_CELL_LEN_CHK(cell, unpack->__len); if (copy.len != 0) { unpack->raw = WT_CELL_VALUE_COPY; - unpack->__len = copy.len; unpack->v = copy.v; + unpack->start_ts = copy.start_ts; + unpack->stop_ts = copy.stop_ts; + unpack->__len = copy.len; } return (0); } /* - * __wt_cell_unpack -- + * __wt_cell_unpack_dsk -- * Unpack a WT_CELL into a structure. */ static inline void -__wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack) +__wt_cell_unpack_dsk( + const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack) { /* * Row-store doesn't store zero-length values on pages, but this allows - * us to pretend. + * us to pretend. If there aren't any timestamps (which is what it will + * take to get to a zero-length item), the value must be stable. */ if (cell == NULL) { unpack->cell = NULL; unpack->v = 0; + unpack->start_ts = WT_TS_NONE; + unpack->stop_ts = WT_TS_MAX; unpack->data = ""; unpack->size = 0; unpack->__len = 0; @@ -733,7 +825,17 @@ __wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack) return; } - (void)__wt_cell_unpack_safe(cell, unpack, NULL, NULL); + (void)__wt_cell_unpack_safe(dsk, cell, unpack, NULL); +} + +/* + * __wt_cell_unpack -- + * Unpack a WT_CELL into a structure. + */ +static inline void +__wt_cell_unpack(WT_PAGE *page, WT_CELL *cell, WT_CELL_UNPACK *unpack) +{ + __wt_cell_unpack_dsk(page->dsk, cell, unpack); } /* @@ -818,3 +920,27 @@ __wt_page_cell_data_ref(WT_SESSION_IMPL *session, { return (__cell_data_ref(session, page, page->type, unpack, store)); } + +/* + * WT_CELL_FOREACH -- + * Walk the cells on a page. + */ +#define WT_CELL_FOREACH_BEGIN(btree, dsk, unpack, skip_ts) do { \ + uint32_t __i; \ + uint8_t *__cell; \ + for (__cell = WT_PAGE_HEADER_BYTE(btree, dsk), \ + __i = (dsk)->u.entries; \ + __i > 0; __cell += (unpack).__len, --__i) { \ + __wt_cell_unpack_dsk(dsk, (WT_CELL *)__cell, &(unpack));\ + /* \ + * Optionally skip unstable page entries after downgrade\ + * to a release without page timestamps. Check for cells\ + * with unstable timestamps when we're not writing such \ + * cells ourselves. \ + */ \ + if ((skip_ts) && \ + (unpack).stop_ts != WT_TS_MAX && \ + !__wt_process.page_version_ts) \ + continue; +#define WT_CELL_FOREACH_END \ + } } while (0) diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 5fb0cee2b91..557f8117f17 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -18,14 +18,18 @@ struct __wt_process { /* Locked: connection queue */ TAILQ_HEAD(__wt_connection_impl_qh, __wt_connection_impl) connqh; - WT_CACHE_POOL *cache_pool; -#define WT_TSC_DEFAULT_RATIO 1.0 - double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */ - bool use_epochtime; /* use expensive time */ + + bool page_version_ts; /* timestamp version page formats */ /* Checksum function */ #define __wt_checksum(chunk, len) __wt_process.checksum(chunk, len) uint32_t (*checksum)(const void *, size_t); + +#define WT_TSC_DEFAULT_RATIO 1.0 + double tsc_nsec_ratio; /* rdtsc ticks to nanoseconds */ + bool use_epochtime; /* use expensive time */ + + WT_CACHE_POOL *cache_pool; /* shared cache information */ }; extern WT_PROCESS __wt_process; diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i index 1b301a3d28a..0c5182de528 100644 --- a/src/third_party/wiredtiger/src/include/cursor.i +++ b/src/third_party/wiredtiger/src/include/cursor.i @@ -425,7 +425,7 @@ __cursor_row_slot_return(WT_CURSOR_BTREE *cbt, WT_ROW *rip, WT_UPDATE *upd) */ kpack = &_kpack; memset(kpack, 0, sizeof(*kpack)); - __wt_cell_unpack(cell, kpack); + __wt_cell_unpack(page, cell, kpack); if (kpack->type == WT_CELL_KEY && cbt->rip_saved != NULL && cbt->rip_saved == rip - 1) { WT_ASSERT(session, cbt->row_key->size >= kpack->prefix); @@ -495,8 +495,23 @@ __cursor_check_prepared_update(WT_CURSOR_BTREE *cbt, bool *visiblep) /* The update that returned prepared conflict is now visible. */ F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT | WT_CBT_ITERATE_RETRY_PREV); - if (*visiblep) - WT_RET(__cursor_kv_return(session, cbt, upd)); + if (*visiblep) { + /* + * The underlying key-return function uses a comparison value + * of 0 to indicate the search function has pre-built the key + * we want to return. That's not the case, don't take that path. + */ + cbt->compare = 1; + /* + * If a prepared delete operation is resolved, it will be + * visible, but key is not valid. The update will be null in + * that case and we continue with cursor navigation. + */ + if (upd != NULL) + WT_RET(__cursor_kv_return(session, cbt, upd)); + else + *visiblep = false; + } return (0); } diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 50eaa16e847..deb21e86d38 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -155,7 +155,7 @@ extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *add extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page); extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 35bd30419da..6c3cab88923 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -736,6 +736,7 @@ struct __wt_dsrc_stats { int64_t btree_entries; int64_t btree_overflow; int64_t btree_compact_rewrite; + int64_t btree_row_empty_values; int64_t btree_row_internal; int64_t btree_row_leaf; int64_t cache_bytes_inuse; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 8d093a81588..aebfce5e0ad 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -41,7 +41,10 @@ #define WT_SESSION_IS_CHECKPOINT(s) \ ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id) -#define WT_TS_NONE 0 /* No timestamp */ +#define WT_TS_NONE 0 /* No (or earliest) timestamp) */ +#define WT_TS_FIXME 37 /* Fake timestamp */ +#define WT_TS_MAX UINT64_MAX /* Valid after start time */ + /* Bytes to hold a hex timestamp */ #define WT_TS_HEX_SIZE (2 * sizeof(wt_timestamp_t) + 1) diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index a297db2cf9e..d0acacae5f3 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -441,7 +441,7 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op) */ timestamp = op->type == WT_TXN_OP_REF_DELETE ? &op->u.ref->page_del->timestamp : &op->u.op_upd->timestamp; - if (*timestamp == 0) + if (*timestamp == WT_TS_NONE) *timestamp = txn->commit_timestamp; } } diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 33d0b1ed74d..6fdd1c6408d 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5827,291 +5827,296 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); /*! btree: pages rewritten by compaction */ #define WT_STAT_DSRC_BTREE_COMPACT_REWRITE 2037 /*! + * btree: row-store empty values, only reported if tree_walk or all + * statistics are enabled + */ +#define WT_STAT_DSRC_BTREE_ROW_EMPTY_VALUES 2038 +/*! * btree: row-store internal pages, only reported if tree_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038 +#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2039 /*! * btree: row-store leaf pages, only reported if tree_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039 +#define WT_STAT_DSRC_BTREE_ROW_LEAF 2040 /*! cache: bytes currently in the cache */ -#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040 +#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2041 /*! cache: bytes dirty in the cache cumulative */ -#define WT_STAT_DSRC_CACHE_BYTES_DIRTY_TOTAL 2041 +#define WT_STAT_DSRC_CACHE_BYTES_DIRTY_TOTAL 2042 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2042 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2043 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2043 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2044 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2044 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2045 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2045 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2046 /*! cache: eviction walk passes of a file */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALK_PASSES 2046 +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_PASSES 2047 /*! cache: eviction walk target pages histogram - 0-9 */ -#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2047 +#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2048 /*! cache: eviction walk target pages histogram - 10-31 */ -#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2048 +#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2049 /*! cache: eviction walk target pages histogram - 128 and higher */ -#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2049 +#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2050 /*! cache: eviction walk target pages histogram - 32-63 */ -#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2050 +#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2051 /*! cache: eviction walk target pages histogram - 64-128 */ -#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2051 +#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2052 /*! cache: eviction walks abandoned */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2052 +#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2053 /*! cache: eviction walks gave up because they restarted their walk twice */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2053 +#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2054 /*! * cache: eviction walks gave up because they saw too many pages and * found no candidates */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2054 +#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2055 /*! * cache: eviction walks gave up because they saw too many pages and * found too few candidates */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2055 +#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2056 /*! cache: eviction walks reached end of tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2056 +#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2057 /*! cache: eviction walks started from root of tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2057 +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2058 /*! cache: eviction walks started from saved location in tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2058 +#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2059 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2059 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2060 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2060 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2061 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2061 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2062 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2062 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2063 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2063 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2064 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2064 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2065 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2065 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2066 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2066 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2067 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2067 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2068 /*! cache: page written requiring cache overflow records */ -#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2068 +#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2069 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2069 +#define WT_STAT_DSRC_CACHE_READ 2070 /*! cache: pages read into cache after truncate */ -#define WT_STAT_DSRC_CACHE_READ_DELETED 2070 +#define WT_STAT_DSRC_CACHE_READ_DELETED 2071 /*! cache: pages read into cache after truncate in prepare state */ -#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2071 +#define WT_STAT_DSRC_CACHE_READ_DELETED_PREPARED 2072 /*! cache: pages read into cache requiring cache overflow entries */ -#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2072 +#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2073 /*! cache: pages requested from the cache */ -#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2073 +#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2074 /*! cache: pages seen by eviction walk */ -#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2074 +#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2075 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2075 +#define WT_STAT_DSRC_CACHE_WRITE 2076 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2076 +#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2077 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2077 +#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2078 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2078 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2079 /*! * cache_walk: Average difference between current eviction generation * when the page was last considered, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2079 +#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2080 /*! * cache_walk: Average on-disk page image size seen, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2080 +#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2081 /*! * cache_walk: Average time in cache for pages that have been visited by * the eviction server, only reported if cache_walk or all statistics are * enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2081 +#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2082 /*! * cache_walk: Average time in cache for pages that have not been visited * by the eviction server, only reported if cache_walk or all statistics * are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2082 +#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2083 /*! * cache_walk: Clean pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2083 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2084 /*! * cache_walk: Current eviction generation, only reported if cache_walk * or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2084 +#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2085 /*! * cache_walk: Dirty pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2085 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2086 /*! * cache_walk: Entries in the root page, only reported if cache_walk or * all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2086 +#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2087 /*! * cache_walk: Internal pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2087 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2088 /*! * cache_walk: Leaf pages currently in cache, only reported if cache_walk * or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2088 +#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2089 /*! * cache_walk: Maximum difference between current eviction generation * when the page was last considered, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2089 +#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2090 /*! * cache_walk: Maximum page size seen, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2090 +#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2091 /*! * cache_walk: Minimum on-disk page image size seen, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2091 +#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2092 /*! * cache_walk: Number of pages never visited by eviction server, only * reported if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2092 +#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2093 /*! * cache_walk: On-disk page image sizes smaller than a single allocation * unit, only reported if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2093 +#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2094 /*! * cache_walk: Pages created in memory and never written, only reported * if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2094 +#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2095 /*! * cache_walk: Pages currently queued for eviction, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2095 +#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2096 /*! * cache_walk: Pages that could not be queued for eviction, only reported * if cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2096 +#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2097 /*! * cache_walk: Refs skipped during cache traversal, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2097 +#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2098 /*! * cache_walk: Size of the root page, only reported if cache_walk or all * statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2098 +#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2099 /*! * cache_walk: Total number of pages currently in cache, only reported if * cache_walk or all statistics are enabled */ -#define WT_STAT_DSRC_CACHE_STATE_PAGES 2099 +#define WT_STAT_DSRC_CACHE_STATE_PAGES 2100 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2100 +#define WT_STAT_DSRC_COMPRESS_READ 2101 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2101 +#define WT_STAT_DSRC_COMPRESS_WRITE 2102 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2102 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2103 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2103 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2104 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2104 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2105 /*! cursor: close calls that result in cache */ -#define WT_STAT_DSRC_CURSOR_CACHE 2105 +#define WT_STAT_DSRC_CURSOR_CACHE 2106 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2106 +#define WT_STAT_DSRC_CURSOR_CREATE 2107 /*! cursor: cursor operation restarted */ -#define WT_STAT_DSRC_CURSOR_RESTART 2107 +#define WT_STAT_DSRC_CURSOR_RESTART 2108 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2108 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2109 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2109 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2110 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2110 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2111 /*! cursor: cursors reused from cache */ -#define WT_STAT_DSRC_CURSOR_REOPEN 2111 +#define WT_STAT_DSRC_CURSOR_REOPEN 2112 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2112 +#define WT_STAT_DSRC_CURSOR_INSERT 2113 /*! cursor: modify calls */ -#define WT_STAT_DSRC_CURSOR_MODIFY 2113 +#define WT_STAT_DSRC_CURSOR_MODIFY 2114 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2114 +#define WT_STAT_DSRC_CURSOR_NEXT 2115 /*! cursor: open cursor count */ -#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2115 +#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2116 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2116 +#define WT_STAT_DSRC_CURSOR_PREV 2117 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2117 +#define WT_STAT_DSRC_CURSOR_REMOVE 2118 /*! cursor: reserve calls */ -#define WT_STAT_DSRC_CURSOR_RESERVE 2118 +#define WT_STAT_DSRC_CURSOR_RESERVE 2119 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2119 +#define WT_STAT_DSRC_CURSOR_RESET 2120 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2120 +#define WT_STAT_DSRC_CURSOR_SEARCH 2121 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2121 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2122 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2122 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2123 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2123 +#define WT_STAT_DSRC_CURSOR_UPDATE 2124 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2124 +#define WT_STAT_DSRC_REC_DICTIONARY 2125 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2125 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2126 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2126 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2127 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2127 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2128 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2128 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2129 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2129 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2130 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2130 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2131 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2131 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2132 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2132 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2133 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2133 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2134 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2134 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2135 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2135 +#define WT_STAT_DSRC_REC_PAGES 2136 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2136 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2137 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2137 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2138 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2138 +#define WT_STAT_DSRC_SESSION_COMPACT 2139 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2139 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2140 /*! * @} diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c index 0a5eb7eacc2..e33c5c29293 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_track.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c @@ -39,7 +39,7 @@ __ovfl_discard_verbose( WT_RET(__wt_scr_alloc(session, 512, &tmp)); unpack = &_unpack; - __wt_cell_unpack(cell, unpack); + __wt_cell_unpack(page, cell, unpack); __wt_verbose(session, WT_VERB_OVERFLOW, "discard: %s%s%p %s", @@ -93,7 +93,7 @@ __ovfl_discard_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) session, page, *cellp, "free")); /* Discard each cell's overflow item. */ - WT_RET(__wt_ovfl_discard(session, *cellp)); + WT_RET(__wt_ovfl_discard(session, page, *cellp)); } __wt_free(session, track->discard); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 2b2026f87cc..c55b7970a68 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -278,10 +278,10 @@ static int __rec_cell_build_int_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); static int __rec_cell_build_leaf_key(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, bool *); -static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, - WT_RECONCILE *, WT_KV *, uint8_t, uint64_t); -static int __rec_cell_build_val(WT_SESSION_IMPL *, - WT_RECONCILE *, const void *, size_t, uint64_t); +static int __rec_cell_build_ovfl(WT_SESSION_IMPL *, WT_RECONCILE *, + WT_KV *, uint8_t, wt_timestamp_t, wt_timestamp_t, uint64_t); +static int __rec_cell_build_val(WT_SESSION_IMPL *, WT_RECONCILE *, + const void *, size_t, wt_timestamp_t, wt_timestamp_t, uint64_t); static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_fix_slvg(WT_SESSION_IMPL *, @@ -290,8 +290,6 @@ static int __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *); static int __rec_col_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_col_var(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *); -static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, - WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); static int __rec_init(WT_SESSION_IMPL *, WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *); @@ -1246,7 +1244,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } /* Track the first update with non-zero timestamp. */ - if (first_ts_upd == NULL && upd->timestamp != 0) + if (first_ts_upd == NULL && upd->timestamp != WT_TS_NONE) first_ts_upd = upd; /* @@ -1876,8 +1874,8 @@ __rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) * Check for a dictionary match. */ static int -__rec_dict_replace( - WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t rle, WT_KV *val) +__rec_dict_replace(WT_SESSION_IMPL *session, WT_RECONCILE *r, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle, WT_KV *val) { WT_DICTIONARY *dp; uint64_t offset; @@ -1919,8 +1917,8 @@ __rec_dict_replace( */ offset = (uint64_t)WT_PTRDIFF(r->first_free, (uint8_t *)r->cur_ptr->image.mem + dp->offset); - val->len = val->cell_len = - __wt_cell_pack_copy(&val->cell, rle, offset); + val->len = val->cell_len = __wt_cell_pack_copy( + &val->cell, start_ts, stop_ts, rle, offset); val->buf.data = NULL; val->buf.size = 0; } @@ -2881,6 +2879,8 @@ __rec_split_write_header(WT_SESSION_IMPL *session, dsk->u.entries = chunk->entries; dsk->type = page->type; + dsk->flags = 0; + /* Set the zero-length value flag in the page header. */ if (page->type == WT_PAGE_ROW_LEAF) { F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE); @@ -2899,13 +2899,12 @@ __rec_split_write_header(WT_SESSION_IMPL *session, if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL) F_SET(dsk, WT_PAGE_LAS_UPDATE); - dsk->unused[0] = dsk->unused[1] = 0; + dsk->unused = 0; - /* - * There are page header fields which need to be cleared for consistent - * checksums: specifically, the write generation and the memory owned by - * the block manager. - */ + dsk->version = __wt_process.page_version_ts ? + WT_PAGE_VERSION_TS : WT_PAGE_VERSION_ORIG; + + /* Clear the memory owned by the block manager. */ memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header); } @@ -3366,7 +3365,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key(session, r, /* Build key cell */ cursor->key.data, cursor->key.size, &ovfl_key)); WT_RET(__rec_cell_build_val(session, r, /* Build value cell */ - cursor->value.data, cursor->value.size, (uint64_t)0)); + cursor->value.data, cursor->value.size, WT_TS_NONE, WT_TS_MAX, 0)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { @@ -3392,7 +3391,8 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) else { r->all_empty_value = false; if (btree->dictionary) - WT_RET(__rec_dict_replace(session, r, 0, val)); + WT_RET(__rec_dict_replace( + session, r, WT_TS_NONE, WT_TS_MAX, 0, val)); __rec_copy_incr(session, r, val); } @@ -3518,7 +3518,8 @@ __wt_bulk_insert_var( val = &r->v; if (deleted) { - val->cell_len = __wt_cell_pack_del(&val->cell, cbulk->rle); + val->cell_len = __wt_cell_pack_del( + &val->cell, WT_TS_NONE, WT_TS_MAX, cbulk->rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; @@ -3529,7 +3530,8 @@ __wt_bulk_insert_var( * value seen, not the current value. */ WT_RET(__rec_cell_build_val(session, - r, cbulk->last.data, cbulk->last.size, cbulk->rle)); + r, cbulk->last.data, cbulk->last.size, + WT_TS_NONE, WT_TS_MAX, cbulk->rle)); /* Boundary: split or write the page. */ if (WT_CROSSING_SPLIT_BND(r, val->len)) @@ -3537,7 +3539,8 @@ __wt_bulk_insert_var( /* Copy the value onto the page. */ if (btree->dictionary) - WT_RET(__rec_dict_replace(session, r, cbulk->rle, val)); + WT_RET(__rec_dict_replace( + session, r, WT_TS_NONE, WT_TS_MAX, cbulk->rle, val)); __rec_copy_incr(session, r, val); /* Update the starting record number in case we split. */ @@ -3657,7 +3660,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) if (addr == NULL && __wt_off_page(page, ref->addr)) addr = ref->addr; if (addr == NULL) { - __wt_cell_unpack(ref->addr, vpack); + __wt_cell_unpack(page, ref->addr, vpack); val->buf.data = ref->addr; val->buf.size = __wt_cell_total_len(vpack); val->cell_len = 0; @@ -3924,7 +3927,8 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session, static int __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKIE *salvage, - WT_ITEM *value, bool deleted, uint8_t overflow_type, uint64_t rle) + WT_ITEM *value, bool deleted, uint8_t overflow_type, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) { WT_BTREE *btree; WT_KV *val; @@ -3965,19 +3969,20 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, } if (deleted) { - val->cell_len = __wt_cell_pack_del(&val->cell, rle); + val->cell_len = + __wt_cell_pack_del(&val->cell, start_ts, stop_ts, rle); val->buf.data = NULL; val->buf.size = 0; val->len = val->cell_len; } else if (overflow_type) { - val->cell_len = __wt_cell_pack_ovfl( - &val->cell, overflow_type, rle, value->size); + val->cell_len = __wt_cell_pack_ovfl(&val->cell, + overflow_type, start_ts, stop_ts, rle, value->size); val->buf.data = value->data; val->buf.size = value->size; val->len = val->cell_len + value->size; } else - WT_RET(__rec_cell_build_val( - session, r, value->data, value->size, rle)); + WT_RET(__rec_cell_build_val(session, + r, value->data, value->size, start_ts, stop_ts, rle)); /* Boundary: split or write the page. */ if (__rec_need_split(r, val->len)) @@ -3985,7 +3990,8 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) - WT_RET(__rec_dict_replace(session, r, rle, val)); + WT_RET(__rec_dict_replace( + session, r, start_ts, stop_ts, rle, val)); __rec_copy_incr(session, r, val); /* Update the starting record number in case we split. */ @@ -4058,7 +4064,8 @@ __rec_col_var(WT_SESSION_IMPL *session, salvage->take += salvage->missing; } else WT_ERR(__rec_col_var_helper(session, - r, NULL, NULL, true, false, salvage->missing)); + r, NULL, NULL, true, false, + WT_TS_NONE, WT_TS_MAX, salvage->missing)); } /* @@ -4082,7 +4089,7 @@ __rec_col_var(WT_SESSION_IMPL *session, ins = NULL; orig_deleted = true; } else { - __wt_cell_unpack(cell, vpack); + __wt_cell_unpack(page, cell, vpack); nrepeat = __wt_cell_rle(vpack); ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip)); @@ -4228,7 +4235,9 @@ record_loop: /* if (rle != 0) { WT_ERR(__rec_col_var_helper( session, r, salvage, last, - last_deleted, 0, rle)); + last_deleted, 0, + WT_TS_FIXME, WT_TS_FIXME, + rle)); rle = 0; } @@ -4236,7 +4245,9 @@ record_loop: /* last->size = vpack->size; WT_ERR(__rec_col_var_helper( session, r, salvage, last, false, - WT_CELL_VALUE_OVFL, repeat_count)); + WT_CELL_VALUE_OVFL, + WT_TS_FIXME, WT_TS_FIXME, + repeat_count)); /* Track if page has overflow items. */ r->ovfl_items = true; @@ -4284,7 +4295,8 @@ compare: /* continue; } WT_ERR(__rec_col_var_helper(session, r, - salvage, last, last_deleted, 0, rle)); + salvage, last, last_deleted, 0, + WT_TS_FIXME, WT_TS_FIXME, rle)); } /* @@ -4426,7 +4438,8 @@ compare: /* goto next; } WT_ERR(__rec_col_var_helper(session, r, - salvage, last, last_deleted, 0, rle)); + salvage, last, last_deleted, 0, + WT_TS_FIXME, WT_TS_FIXME, rle)); } /* @@ -4471,8 +4484,8 @@ next: if (src_recno == UINT64_MAX) /* If we were tracking a record, write it. */ if (rle != 0) - WT_ERR(__rec_col_var_helper( - session, r, salvage, last, last_deleted, 0, rle)); + WT_ERR(__rec_col_var_helper(session, r, salvage, + last, last_deleted, 0, WT_TS_FIXME, WT_TS_FIXME, rle)); /* Write the remnant page. */ ret = __rec_split_finish(session, r); @@ -4558,7 +4571,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) if (ikey != NULL && ikey->cell_offset != 0) { cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset); - __wt_cell_unpack(cell, kpack); + __wt_cell_unpack(page, cell, kpack); key_onpage_ovfl = kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM; } @@ -4654,7 +4667,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) vtype = state == WT_CHILD_PROXY ? WT_CELL_ADDR_DEL : __rec_vtype(addr); } else { - __wt_cell_unpack(ref->addr, vpack); + __wt_cell_unpack(page, ref->addr, vpack); p = vpack->data; size = vpack->size; vtype = state == WT_CHILD_PROXY ? @@ -4840,7 +4853,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, kpack = NULL; else { kpack = &_kpack; - __wt_cell_unpack(cell, kpack); + __wt_cell_unpack(page, cell, kpack); } /* Unpack the on-page value cell, and look for an update. */ @@ -4873,8 +4886,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, p = tmpval->data; size = tmpval->size; } - WT_ERR(__rec_cell_build_val( - session, r, p, size, (uint64_t)0)); + WT_ERR(__rec_cell_build_val(session, r, + p, size, WT_TS_FIXME, WT_TS_FIXME, 0)); dictionary = true; } else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) { /* @@ -4920,7 +4933,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, */ WT_ERR(__rec_cell_build_val(session, r, "ovfl-unused", strlen("ovfl-unused"), - (uint64_t)0)); + WT_TS_FIXME, WT_TS_FIXME, 0)); } else { val->buf.data = vpack->cell; val->buf.size = __wt_cell_total_len(vpack); @@ -4947,7 +4960,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session, F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_ERR(__rec_cell_build_val(session, r, cbt->iface.value.data, - cbt->iface.value.size, (uint64_t)0)); + cbt->iface.value.size, + WT_TS_FIXME, WT_TS_FIXME, 0)); dictionary = true; break; case WT_UPDATE_STANDARD: @@ -4963,7 +4977,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, } else { WT_ERR(__rec_cell_build_val(session, r, upd->data, upd->size, - (uint64_t)0)); + WT_TS_FIXME, WT_TS_FIXME, 0)); dictionary = true; } break; @@ -5045,7 +5059,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, goto build; kpack = &_kpack; - __wt_cell_unpack(cell, kpack); + __wt_cell_unpack(page, cell, kpack); if (btree->huffman_key == NULL && kpack->type == WT_CELL_KEY && tmpkey->size >= kpack->prefix) { @@ -5117,7 +5131,8 @@ build: else { r->all_empty_value = false; if (dictionary && btree->dictionary) - WT_ERR(__rec_dict_replace(session, r, 0, val)); + WT_ERR(__rec_dict_replace(session, r, + WT_TS_FIXME, WT_TS_FIXME, 0, val)); __rec_copy_incr(session, r, val); } @@ -5194,16 +5209,16 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_RET(__wt_value_return_upd( session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL))); WT_RET(__rec_cell_build_val(session, r, - cbt->iface.value.data, - cbt->iface.value.size, (uint64_t)0)); + cbt->iface.value.data, cbt->iface.value.size, + WT_TS_FIXME, WT_TS_FIXME, 0)); break; case WT_UPDATE_STANDARD: if (upd->size == 0) val->len = 0; else - WT_RET(__rec_cell_build_val(session, - r, upd->data, upd->size, - (uint64_t)0)); + WT_RET(__rec_cell_build_val(session, r, + upd->data, upd->size, + WT_TS_FIXME, WT_TS_FIXME, 0)); break; case WT_UPDATE_TOMBSTONE: continue; @@ -5239,7 +5254,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) else { r->all_empty_value = false; if (btree->dictionary) - WT_RET(__rec_dict_replace(session, r, 0, val)); + WT_RET(__rec_dict_replace(session, r, + WT_TS_FIXME, WT_TS_FIXME, 0, val)); __rec_copy_incr(session, r, val); } @@ -5666,8 +5682,8 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session, WT_STAT_DATA_INCR(session, rec_overflow_key_internal); *is_ovflp = true; - return (__rec_cell_build_ovfl( - session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0)); + return (__rec_cell_build_ovfl(session, r, + key, WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); } key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size); @@ -5766,8 +5782,8 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session, WT_STAT_DATA_INCR(session, rec_overflow_key_leaf); *is_ovflp = true; - return (__rec_cell_build_ovfl( - session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0)); + return (__rec_cell_build_ovfl(session, r, key, + WT_CELL_KEY_OVFL, WT_TS_NONE, WT_TS_NONE, 0)); } return ( __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp)); @@ -5820,8 +5836,9 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r, * be stored on the page. */ static int -__rec_cell_build_val(WT_SESSION_IMPL *session, - WT_RECONCILE *r, const void *data, size_t size, uint64_t rle) +__rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r, + const void *data, size_t size, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) { WT_BTREE *btree; WT_KV *val; @@ -5849,11 +5866,12 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, if (val->buf.size > btree->maxleafvalue) { WT_STAT_DATA_INCR(session, rec_overflow_value); - return (__rec_cell_build_ovfl( - session, r, val, WT_CELL_VALUE_OVFL, rle)); + return (__rec_cell_build_ovfl(session, r, + val, WT_CELL_VALUE_OVFL, start_ts, stop_ts, rle)); } } - val->cell_len = __wt_cell_pack_data(&val->cell, rle, val->buf.size); + val->cell_len = __wt_cell_pack_data( + &val->cell, start_ts, stop_ts, rle, val->buf.size); val->len = val->cell_len + val->buf.size; return (0); @@ -5865,7 +5883,8 @@ __rec_cell_build_val(WT_SESSION_IMPL *session, */ static int __rec_cell_build_ovfl(WT_SESSION_IMPL *session, - WT_RECONCILE *r, WT_KV *kv, uint8_t type, uint64_t rle) + WT_RECONCILE *r, WT_KV *kv, uint8_t type, + wt_timestamp_t start_ts, wt_timestamp_t stop_ts, uint64_t rle) { WT_BM *bm; WT_BTREE *btree; @@ -5924,7 +5943,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_ERR(__wt_buf_set(session, &kv->buf, addr, size)); /* Build the cell and return. */ - kv->cell_len = __wt_cell_pack_ovfl(&kv->cell, type, rle, kv->buf.size); + kv->cell_len = __wt_cell_pack_ovfl( + &kv->cell, type, start_ts, stop_ts, rle, kv->buf.size); kv->len = kv->cell_len + kv->buf.size; err: __wt_scr_free(session, &tmp); diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c index f71f91a4daa..3b1cfbbf936 100644 --- a/src/third_party/wiredtiger/src/support/global.c +++ b/src/third_party/wiredtiger/src/support/global.c @@ -12,11 +12,11 @@ WT_PROCESS __wt_process; /* Per-process structure */ static int __wt_pthread_once_failed; /* If initialization failed */ /* - * __wt_endian_check -- + * __endian_check -- * Check the build matches the machine. */ static int -__wt_endian_check(void) +__endian_check(void) { uint64_t v; const char *e; @@ -103,11 +103,11 @@ __global_calibrate_ticks(void) } /* - * __wt_global_once -- + * __global_once -- * Global initialization, run once. */ static void -__wt_global_once(void) +__global_once(void) { WT_DECL_RET; @@ -117,10 +117,17 @@ __wt_global_once(void) return; } + TAILQ_INIT(&__wt_process.connqh); + +#if defined(HAVE_PAGE_VERSION_TS) + __wt_process.page_version_ts = true; +#else + __wt_process.page_version_ts = false; +#endif + __wt_process.checksum = wiredtiger_crc32c_func(); - __global_calibrate_ticks(); - TAILQ_INIT(&__wt_process.connqh); + __global_calibrate_ticks(); } /* @@ -134,7 +141,7 @@ __wt_library_init(void) WT_DECL_RET; /* Check the build matches the machine. */ - WT_RET(__wt_endian_check()); + WT_RET(__endian_check()); /* * Do per-process initialization once, before anything else, but only @@ -143,7 +150,7 @@ __wt_library_init(void) * static and only using that function to avoid a race. */ if (first) { - if ((ret = __wt_once(__wt_global_once)) != 0) + if ((ret = __wt_once(__global_once)) != 0) __wt_pthread_once_failed = ret; first = false; } diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index cee1f270d6d..562c00fe146 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -41,6 +41,7 @@ static const char * const __stats_dsrc_desc[] = { "btree: number of key/value pairs", "btree: overflow pages", "btree: pages rewritten by compaction", + "btree: row-store empty values", "btree: row-store internal pages", "btree: row-store leaf pages", "cache: bytes currently in the cache", @@ -223,6 +224,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->btree_entries = 0; stats->btree_overflow = 0; stats->btree_compact_rewrite = 0; + stats->btree_row_empty_values = 0; stats->btree_row_internal = 0; stats->btree_row_leaf = 0; /* not clearing cache_bytes_inuse */ @@ -390,6 +392,7 @@ __wt_stat_dsrc_aggregate_single( to->btree_entries += from->btree_entries; to->btree_overflow += from->btree_overflow; to->btree_compact_rewrite += from->btree_compact_rewrite; + to->btree_row_empty_values += from->btree_row_empty_values; to->btree_row_internal += from->btree_row_internal; to->btree_row_leaf += from->btree_row_leaf; to->cache_bytes_inuse += from->cache_bytes_inuse; @@ -578,6 +581,8 @@ __wt_stat_dsrc_aggregate( to->btree_overflow += WT_STAT_READ(from, btree_overflow); to->btree_compact_rewrite += WT_STAT_READ(from, btree_compact_rewrite); + to->btree_row_empty_values += + WT_STAT_READ(from, btree_row_empty_values); to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 4ddc75afe6c..17044be4c34 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -643,7 +643,7 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) * Check timestamps are used in order. */ op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); - upd_zero_ts = upd->timestamp == 0; + upd_zero_ts = upd->timestamp == WT_TS_NONE; if (op_zero_ts != upd_zero_ts) WT_RET_MSG(session, EINVAL, "per-key timestamps used inconsistently"); @@ -660,7 +660,7 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) * Only if the update structure doesn't have a timestamp * then use the one in the transaction structure. */ - if (op_timestamp == 0) + if (op_timestamp == WT_TS_NONE) op_timestamp = txn->commit_timestamp; if (op_timestamp < upd->timestamp) WT_RET_MSG(session, EINVAL, @@ -1177,7 +1177,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) checkpoint_timestamp = txn_global->checkpoint_timestamp; commit_timestamp = txn_global->commit_timestamp; pinned_timestamp = txn_global->pinned_timestamp; - if (checkpoint_timestamp != 0 && + if (checkpoint_timestamp != WT_TS_NONE && checkpoint_timestamp < pinned_timestamp) pinned_timestamp = checkpoint_timestamp; WT_STAT_SET(session, stats, txn_pinned_timestamp, diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 3e377043fa7..67a0ca91dae 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -765,7 +765,7 @@ __txn_checkpoint_can_skip(WT_SESSION_IMPL *session, * more that could be written. */ if (use_timestamp && txn_global->has_stable_timestamp && - txn_global->last_ckpt_timestamp != 0 && + txn_global->last_ckpt_timestamp != WT_TS_NONE && txn_global->last_ckpt_timestamp == txn_global->stable_timestamp) { *can_skipp = true; return (0); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 7b8e2ca06ae..d8d08c73863 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -104,7 +104,8 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session, * strict timestamp checking, assert that all more recent * updates were also rolled back. */ - if (upd->txnid == WT_TXN_ABORTED || upd->timestamp == 0) { + if (upd->txnid == WT_TXN_ABORTED || + upd->timestamp == WT_TS_NONE) { if (upd == first_upd) first_upd = upd->next; } else if (rollback_timestamp < upd->timestamp) { diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index b58152f8599..adb52772eea 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -123,7 +123,7 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) { WT_RET(__wt_txn_parse_timestamp_raw(session, name, timestamp, cval)); - if (cval->len != 0 && *timestamp == 0) + if (cval->len != 0 && *timestamp == WT_TS_NONE) WT_RET_MSG(session, EINVAL, "Failed to parse %s timestamp '%.*s': zero not permitted", name, (int)cval->len, cval->str); @@ -160,7 +160,7 @@ __txn_get_pinned_timestamp( /* Check for a running checkpoint */ if (LF_ISSET(WT_TXN_TS_INCLUDE_CKPT) && - txn_global->checkpoint_timestamp != 0 && + txn_global->checkpoint_timestamp != WT_TS_NONE && (tmp_ts == 0 || txn_global->checkpoint_timestamp < tmp_ts)) tmp_ts = txn_global->checkpoint_timestamp; if (!txn_has_write_lock) diff --git a/src/third_party/wiredtiger/test/checkpoint/smoke.sh b/src/third_party/wiredtiger/test/checkpoint/smoke.sh index 2f1d4345ad7..8db6fc1ebc4 100755 --- a/src/third_party/wiredtiger/test/checkpoint/smoke.sh +++ b/src/third_party/wiredtiger/test/checkpoint/smoke.sh @@ -2,6 +2,9 @@ set -e +# Bypass this test for valgrind +test "$TESTUTIL_BYPASS_VALGRIND" = "1" && exit 0 + # Smoke-test checkpoints as part of running "make check". echo "checkpoint: 3 mixed tables" $TEST_WRAPPER ./t -T 3 -t m diff --git a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c index 2757f991c2a..6df68da932d 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2246_col_append/main.c @@ -98,6 +98,10 @@ main(int argc, char *argv[]) uint64_t i, id; char buf[100]; + /* Bypass this test for valgrind */ + if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND")) + return (EXIT_SUCCESS); + opts = &_opts; memset(opts, 0, sizeof(*opts)); opts->table_type = TABLE_ROW; diff --git a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c index 0b99df76cf3..6cd94ba7572 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2323_join_visibility/main.c @@ -92,6 +92,10 @@ main(int argc, char *argv[]) TEST_OPTS *opts, _opts; const char *tablename; + /* Bypass this test for valgrind */ + if (testutil_is_flag_set("TESTUTIL_BYPASS_VALGRIND")) + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c index a8d44bf3dab..521e67b2439 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c @@ -163,9 +163,7 @@ main(int argc, char *argv[]) testutil_assert(count == 0); testutil_progress(opts, "cleanup starting"); -#if 0 testutil_cleanup(opts); -#endif return (EXIT_SUCCESS); } diff --git a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c index 80911ddfd2d..2021ff1849e 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2853_perf/main.c @@ -186,10 +186,7 @@ main(int argc, char *argv[]) testutil_assert(nfail == 0); testutil_progress(opts, "cleanup starting"); -#if 0 testutil_cleanup(opts); -#endif - return (0); } diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index e9063674476..4b8eadeea1d 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -377,6 +377,13 @@ mmrand(WT_RAND_STATE *rnd, u_int min, u_int max) uint32_t v; u_int range; + /* + * Test runs with small row counts can easily pass a max of 0 (for + * example, "g.rows / 20"). Avoid the problem. + */ + if (min <= max) + return (min); + v = rng(rnd); range = (max - min) + 1; v %= range; diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index f92f438a4f1..d5ed0320761 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -276,6 +276,44 @@ wts_ops(int lastrun) free(tinfo_list); } +typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation; + +/* + * read_op -- + * Perform a read operation, waiting out prepare conflicts. + */ +static inline int +read_op(WT_CURSOR *cursor, read_operation op, int *exactp) +{ + WT_DECL_RET; + + /* + * Read operations wait out prepare-conflicts. (As part of the snapshot + * isolation checks, we repeat reads that succeeded before, they should + * be repeatable.) + */ + switch (op) { + case NEXT: + while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case PREV: + while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case SEARCH: + while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + case SEARCH_NEAR: + while ((ret = + cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT) + __wt_yield(); + break; + } + return (ret); +} + typedef enum { INSERT, MODIFY, READ, REMOVE, TRUNCATE, UPDATE } thread_op; typedef struct { thread_op op; /* Operation */ @@ -401,7 +439,7 @@ snap_check(WT_CURSOR *cursor, } } - switch (ret = cursor->search(cursor)) { + switch (ret = read_op(cursor, SEARCH, NULL)) { case 0: if (g.type == FIX) { testutil_check( @@ -634,12 +672,22 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session) */ #define OP_FAILED(notfound_ok) do { \ positioned = false; \ - if (intxn && (ret == WT_CACHE_FULL || \ - ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK)) \ + if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \ goto rollback; \ testutil_assert((notfound_ok && ret == WT_NOTFOUND) || \ - ret == WT_CACHE_FULL || \ - ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK); \ + ret == WT_CACHE_FULL || ret == WT_ROLLBACK); \ +} while (0) + +/* + * Rollback updates returning prepare-conflict, they're unlikely to succeed + * unless the prepare aborts. Reads wait out the error, so it's unexpected. + */ +#define READ_OP_FAILED(notfound_ok) \ + OP_FAILED(notfound_ok) +#define WRITE_OP_FAILED(notfound_ok) do { \ + if (ret == WT_PREPARE_CONFLICT) \ + ret = WT_ROLLBACK; \ + OP_FAILED(notfound_ok); \ } while (0) /* @@ -826,7 +874,7 @@ ops(void *arg) positioned = true; SNAP_TRACK(READ, tinfo); } else - OP_FAILED(true); + READ_OP_FAILED(true); } /* Optionally reserve a row. */ @@ -845,7 +893,7 @@ ops(void *arg) __wt_yield(); /* Let other threads proceed. */ } else - OP_FAILED(true); + WRITE_OP_FAILED(true); } /* Perform the operation. */ @@ -875,7 +923,7 @@ ops(void *arg) ++tinfo->insert; SNAP_TRACK(INSERT, tinfo); } else - OP_FAILED(false); + WRITE_OP_FAILED(false); break; case MODIFY: /* @@ -899,7 +947,7 @@ ops(void *arg) positioned = true; SNAP_TRACK(MODIFY, tinfo); } else - OP_FAILED(true); + WRITE_OP_FAILED(true); break; case READ: ++tinfo->search; @@ -908,7 +956,7 @@ ops(void *arg) positioned = true; SNAP_TRACK(READ, tinfo); } else - OP_FAILED(true); + READ_OP_FAILED(true); break; case REMOVE: remove_instead_of_truncate: @@ -929,7 +977,7 @@ remove_instead_of_truncate: */ SNAP_TRACK(REMOVE, tinfo); } else - OP_FAILED(true); + WRITE_OP_FAILED(true); break; case TRUNCATE: /* @@ -958,7 +1006,8 @@ remove_instead_of_truncate: * vice-versa). */ greater_than = mmrand(&tinfo->rnd, 0, 1) == 1; - range = mmrand(&tinfo->rnd, 1, (u_int)g.rows / 20); + range = g.rows < 20 ? + 1 : mmrand(&tinfo->rnd, 1, (u_int)g.rows / 20); tinfo->last = tinfo->keyno; if (greater_than) { if (g.c_reverse) { @@ -992,14 +1041,15 @@ remove_instead_of_truncate: ret = col_truncate(tinfo, cursor); break; } - positioned = false; (void)__wt_atomic_subv64(&g.truncate_cnt, 1); + /* Truncate never leaves the cursor positioned. */ + positioned = false; if (ret == 0) { ++tinfo->truncate; SNAP_TRACK(TRUNCATE, tinfo); } else - OP_FAILED(false); + WRITE_OP_FAILED(false); break; case UPDATE: update_instead_of_chosen_op: @@ -1017,7 +1067,7 @@ update_instead_of_chosen_op: positioned = true; SNAP_TRACK(UPDATE, tinfo); } else - OP_FAILED(false); + WRITE_OP_FAILED(false); break; } @@ -1033,7 +1083,7 @@ update_instead_of_chosen_op: if ((ret = nextprev(tinfo, cursor, next)) == 0) continue; - OP_FAILED(true); + READ_OP_FAILED(true); break; } } @@ -1066,9 +1116,8 @@ update_instead_of_chosen_op: */ if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) { ret = prepare_transaction(tinfo, session); - testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT); - if (ret == WT_PREPARE_CONFLICT) - goto rollback; + if (ret != 0) + WRITE_OP_FAILED(false); __wt_yield(); /* Let other threads proceed. */ } @@ -1193,11 +1242,11 @@ read_row_worker( } if (sn) { - ret = cursor->search_near(cursor, &exact); + ret = read_op(cursor, SEARCH_NEAR, &exact); if (ret == 0 && exact != 0) ret = WT_NOTFOUND; } else - ret = cursor->search(cursor); + ret = read_op(cursor, SEARCH, NULL); switch (ret) { case 0: if (g.type == FIX) { @@ -1288,7 +1337,7 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next) keyno = 0; which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev"; - switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) { + switch (ret = read_op(cursor, next ? NEXT : PREV, NULL)) { case 0: switch (g.type) { case FIX: @@ -2019,7 +2068,7 @@ row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) } /* We use the cursor in overwrite mode, check for existence. */ - if ((ret = cursor->search(cursor)) == 0) + if ((ret = read_op(cursor, SEARCH, NULL)) == 0) ret = cursor->remove(cursor); if (ret != 0 && ret != WT_NOTFOUND) @@ -2053,7 +2102,7 @@ col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) cursor->set_key(cursor, tinfo->keyno); /* We use the cursor in overwrite mode, check for existence. */ - if ((ret = cursor->search(cursor)) == 0) + if ((ret = read_op(cursor, SEARCH, NULL)) == 0) ret = cursor->remove(cursor); if (ret != 0 && ret != WT_NOTFOUND) diff --git a/src/third_party/wiredtiger/test/suite/test_dictionary.py b/src/third_party/wiredtiger/test/suite/test_dictionary.py new file mode 100644 index 00000000000..f624e1ade35 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_dictionary.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_dictionary.py +# Smoke test dictionary compression. + +from wtscenario import make_scenarios +from wtdataset import simple_key +from wiredtiger import stat +import wiredtiger, wttest + +# Smoke test dictionary compression. +class test_dictionary(wttest.WiredTigerTestCase): + conn_config = 'statistics=(all)' + scenarios = make_scenarios([ + ('row', dict(key_format='S', value_format='S')), + ('var', dict(key_format='r', value_format='S')), + ]) + + # Smoke test dictionary compression. + def test_dictionary(self): + nentries = 25000 + uri = 'file:test_dictionary' # This is a btree layer test. + + # Create the object, open the cursor, insert some records with identical values. Use + # alternating values, otherwise column-store will RLE compress them into a single item. + self.session.create(uri, 'dictionary=100,value_format=S,key_format=' + self.key_format) + cursor = self.session.open_cursor(uri, None) + i = 0 + while i < nentries: + i = i + 1 + cursor[simple_key(cursor, i)] = "the same value as the odd items" + i = i + 1 + cursor[simple_key(cursor, i)] = "the same value as the even items" + cursor.close() + + # Checkpoint to force the pages through reconciliation. + self.session.checkpoint() + + # Confirm the dictionary was effective. + cursor = self.session.open_cursor('statistics:' + uri, None, None) + self.assertGreater(cursor[stat.dsrc.rec_dictionary][2], nentries - 100) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_empty_value.py b/src/third_party/wiredtiger/test/suite/test_empty_value.py new file mode 100644 index 00000000000..b40eaaef3d1 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_empty_value.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_dictionary.py +# Smoke test empty row-store values. + +from wtdataset import simple_key +from wiredtiger import stat +import wiredtiger, wttest + +# Smoke test empty row-store values. +class test_row_store_empty_values(wttest.WiredTigerTestCase): + conn_config = 'statistics=(all)' + + # Smoke test empty row-store values. + def test_row_store_empty_values(self): + nentries = 25000 + uri = 'file:test_empty_values' # This is a btree layer test. + + # Create the object, open the cursor, insert some records with zero-length values. + self.session.create(uri, 'value_format=u,key_format=S') + cursor = self.session.open_cursor(uri, None) + for i in xrange(1, nentries + 1): + cursor[simple_key(cursor, i)] = "" + cursor.close() + + # Reopen to force the object to disk. + self.reopen_conn() + + # Confirm the values weren't stored.. + cursor = self.session.open_cursor('statistics:' + uri, None, 'statistics=(tree_walk)') + self.assertEqual(cursor[stat.dsrc.btree_row_empty_values][2], nentries) + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index 8d5605208cf..b66ae497707 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -199,7 +199,7 @@ bool testutil_is_flag_set(const char *flag) { const char *res; - bool enable_long_tests; + bool flag_being_set; if (__wt_getenv(NULL, flag, &res) != 0 || res == NULL) return (false); @@ -208,11 +208,11 @@ testutil_is_flag_set(const char *flag) * This is a boolean test. So if the environment variable is set to any * value other than 0, we return success. */ - enable_long_tests = res[0] != '0'; + flag_being_set = res[0] != '0'; free((void *)res); - return (enable_long_tests); + return (flag_being_set); } /* |