diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-05-14 16:44:40 +1000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-14 07:02:37 +0000 |
commit | 328c35e4b883540675fb4b626c53a08f74e43cf0 (patch) | |
tree | b2a7100993281a81bb4d76ad040c9164bf617453 | |
parent | cb6fe33a7616148bc4590ccdb0fcbc65b5c7035e (diff) | |
download | mongo-328c35e4b883540675fb4b626c53a08f74e43cf0.tar.gz |
Import wiredtiger: 4cf221a61f19db1ed706f923d18d3d0b507101c9 from branch mongodb-4.4r4.4.0-rc6
ref: 63b37d1861..4cf221a61f
for: 4.4.0-rc6
WT-5725 Remove the WT_CURSOR_BTREE.btree field
WT-6082 Skip writing txn-id to data store when not needed
WT-6088 Re-enable format-test
WT-6151 Perform an empty check on time window before performing visibility check
WT-6164 Reduce the number of page flags according to the flag variable size
WT-6169 Use helper functions to check time window's visibility
WT-6172 Fix silent failures of Evergreen test/format tasks
WT-6186 Only delete timestamped history store entries when seeing a non-timestamped tombstone
WT-6201 Release WT_CURSOR_BTREE buffers when caching a WT_CURSOR
WT-6205 Coverity analysis defect 114159: Redundant test
WT-6206 Coverity analysis defect 114158: Uninitialized pointer read
WT-6207 Coverity analysis defect 114156: PW.SET_BUT_NOT_USED
WT-6209 Coverity analysis defect 114135: Unintentional integer overflow
WT-6210 Coverity analysis defect 114134: Explicit null dereferenced
47 files changed, 420 insertions, 250 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index d8fc8e08f4a..843175370cc 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -1321,6 +1321,7 @@ trylock tsc tupdate tvalue +tw txn txnc txnid diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 68ce2c05a34..b9da2f39f20 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "63b37d1861f396e09a76b69f9b786740709a6f8c" + "commit": "4cf221a61f19db1ed706f923d18d3d0b507101c9" } diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 4d6f62a10b7..3b13e4b7b9c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -48,7 +48,7 @@ restart: */ recno = WT_INSERT_RECNO(cbt->ins); while ((current = cbt->ins) != PREV_INS(cbt, 0)) { - if (cbt->btree->type == BTREE_ROW) { + if (CUR2BT(cbt)->type == BTREE_ROW) { key.data = WT_INSERT_KEY(current); key.size = WT_INSERT_KEY_SIZE(current); WT_RET(__wt_search_insert(session, cbt, cbt->ins_head, &key)); diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index ccec03700d0..29768838120 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -180,7 +180,8 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint64_t recno, bool *vali WT_SESSION_IMPL *session; *valid = false; - btree = cbt->btree; + + btree = CUR2BT(cbt); page = cbt->ref->page; session = CUR2S(cbt); @@ -436,7 +437,7 @@ __wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp) *updp = NULL; cbt = (WT_CURSOR_BTREE *)cursor; - btree = cbt->btree; + btree = CUR2BT(cbt); upd = NULL; /* -Wuninitialized */ /* @@ -459,7 +460,7 @@ __wt_btcur_search_prepared(WT_CURSOR *cursor, WT_UPDATE **updp) return (0); /* Get any uncommitted update from the in-memory page. */ - switch (cbt->btree->type) { + switch (btree->type) { case BTREE_ROW: /* * Any update must be either in the insert list, in which case search will have returned a @@ -501,7 +502,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_SESSION_IMPL *session; bool leaf_found, valid; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); @@ -597,7 +598,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) int exact; bool leaf_found, valid; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); exact = 0; @@ -751,15 +752,12 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) uint64_t yield_count, sleep_usecs; bool append_key, valid; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; insert_bytes = cursor->key.size + cursor->value.size; session = CUR2S(cbt); yield_count = sleep_usecs = 0; - WT_RET_PANIC_ASSERT( - session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree"); - WT_STAT_CONN_INCR(session, cursor_insert); WT_STAT_DATA_INCR(session, cursor_insert); WT_STAT_CONN_INCRV(session, cursor_insert_bytes, insert_bytes); @@ -907,7 +905,7 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) WT_SESSION_IMPL *session; WT_UPDATE *upd; - btree = cbt->btree; + btree = CUR2BT(cbt); page = cbt->ref->page; session = CUR2S(cbt); upd = NULL; @@ -944,7 +942,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) session = CUR2S(cbt); yield_count = sleep_usecs = 0; - WT_ASSERT(session, cbt->btree->type == BTREE_ROW); + WT_ASSERT(session, CUR2BT(cbt)->type == BTREE_ROW); /* * The pinned page goes away if we do a search, get a local copy of any pinned key and discard @@ -990,7 +988,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned) uint64_t yield_count, sleep_usecs; bool iterating, searched, valid; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); yield_count = sleep_usecs = 0; @@ -1181,14 +1179,11 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) uint64_t yield_count, sleep_usecs; bool leaf_found, valid; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); yield_count = sleep_usecs = 0; - WT_RET_PANIC_ASSERT( - session, S2BT(session) == btree, WT_PANIC, "btree differs unexpectedly from session's btree"); - /* It's no longer possible to bulk-load into the tree. */ __wt_cursor_disable_bulk(session); @@ -1365,7 +1360,7 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt) upd = NULL; if (cbt->ins != NULL) upd = cbt->ins->upd; - else if (cbt->btree->type == BTREE_ROW && page->modify != NULL && + else if (CUR2BT(cbt)->type == BTREE_ROW && page->modify != NULL && page->modify->mod_row_update != NULL) upd = page->modify->mod_row_update[cbt->slot]; @@ -1525,7 +1520,7 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_SESSION_IMPL *session; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); @@ -1548,18 +1543,20 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) { + WT_BTREE *btree; WT_CURSOR *a, *b; WT_SESSION_IMPL *session; + btree = CUR2BT(a_arg); a = (WT_CURSOR *)a_arg; b = (WT_CURSOR *)b_arg; session = CUR2S(a_arg); /* Confirm both cursors reference the same object. */ - if (a_arg->btree != b_arg->btree) - WT_RET_MSG(session, EINVAL, "Cursors must reference the same object"); + if (CUR2BT(a_arg) != CUR2BT(b_arg)) + WT_RET_MSG(session, EINVAL, "cursors must reference the same object"); - switch (a_arg->btree->type) { + switch (btree->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: /* @@ -1574,7 +1571,7 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) *cmpp = 1; break; case BTREE_ROW: - WT_RET(__wt_compare(session, a_arg->btree->collator, &a->key, &b->key, cmpp)); + WT_RET(__wt_compare(session, btree->collator, &a->key, &b->key, cmpp)); break; } return (0); @@ -1587,7 +1584,7 @@ __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) static inline bool __cursor_equals(WT_CURSOR_BTREE *a, WT_CURSOR_BTREE *b) { - switch (a->btree->type) { + switch (CUR2BT(a)->type) { case BTREE_COL_FIX: case BTREE_COL_VAR: /* @@ -1629,8 +1626,8 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) cmp = 0; /* Confirm both cursors reference the same object. */ - if (a_arg->btree != b_arg->btree) - WT_RET_MSG(session, EINVAL, "Cursors must reference the same object"); + if (CUR2BT(a_arg) != CUR2BT(b_arg)) + WT_RET_MSG(session, EINVAL, "cursors must reference the same object"); /* * The reason for an equals method is because we can avoid doing a full key comparison in some @@ -1771,7 +1768,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_DECL_RET; WT_SESSION_IMPL *session; - btree = start->btree; + btree = CUR2BT(start); session = CUR2S(start); WT_STAT_DATA_INCR(session, cursor_truncate); @@ -1825,7 +1822,6 @@ __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) memset(cbt, 0, sizeof(WT_CURSOR_BTREE)); cbt->iface.session = &session->iface; - cbt->btree = S2BT(session); } /* @@ -1847,6 +1843,23 @@ __wt_btcur_open(WT_CURSOR_BTREE *cbt) } /* + * __wt_btcur_cache -- + * Discard buffers when caching a cursor. + */ +void +__wt_btcur_cache(WT_CURSOR_BTREE *cbt) +{ + WT_SESSION_IMPL *session; + + session = CUR2S(cbt); + + __wt_buf_free(session, &cbt->_row_key); + __wt_buf_free(session, &cbt->_tmp); + __wt_buf_free(session, &cbt->_modify_update.buf); + __wt_buf_free(session, &cbt->_upd_value.buf); +} + +/* * __wt_btcur_close -- * Close a btree cursor. */ @@ -1866,10 +1879,10 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) if (!lowlevel) ret = __cursor_reset(cbt); - __wt_buf_free(session, &cbt->_modify_update.buf); - __wt_buf_free(session, &cbt->_upd_value.buf); __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); + __wt_buf_free(session, &cbt->_modify_update.buf); + __wt_buf_free(session, &cbt->_upd_value.buf); #ifdef HAVE_DIAGNOSTIC __wt_buf_free(session, &cbt->_lastkey); #endif diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index d0fb68ecb03..c7fc242c215 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -878,12 +878,15 @@ int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { - WT_CURSOR *cursor; WT_CURSOR_BTREE *cbt; + WT_DECL_RET; + WT_SESSION_IMPL *session; - cursor = cursor_arg; cbt = cursor_arg; - return (__wt_debug_page(cursor->session, cbt->btree, cbt->ref, ofile)); + session = CUR2S(cursor_arg); + + WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_debug_page(session, NULL, cbt->ref, ofile)); + return (ret); } /* @@ -905,7 +908,7 @@ __wt_debug_cursor_tree_hs(void *cursor_arg, const char *ofile) WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner)); cbt = (WT_CURSOR_BTREE *)session->hs_cursor; - ret = __wt_debug_tree_all(session, cbt->btree, NULL, ofile); + WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_debug_tree_all(session, NULL, NULL, ofile)); WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner)); return (ret); diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c index b9a3eed1c93..cd02a3de482 100644 --- a/src/third_party/wiredtiger/src/btree/bt_delete.c +++ b/src/third_party/wiredtiger/src/btree/bt_delete.c @@ -383,7 +383,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * don't need to append a tombstone. */ __wt_read_row_time_window(session, page, rip, &tw); - if (tw.stop_ts == WT_TS_MAX && tw.stop_txn == WT_TXN_MAX) { + if (!__wt_time_window_has_stop(&tw)) { WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size)); upd->next = upd_array[WT_ROW_SLOT(page, rip)]; upd_array[WT_ROW_SLOT(page, rip)] = upd; diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c index 6d1c377a3f0..aba3c54fb74 100644 --- a/src/third_party/wiredtiger/src/btree/bt_page.c +++ b/src/third_party/wiredtiger/src/btree/bt_page.c @@ -541,13 +541,14 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) btree = S2BT(session); prepare = false; + WT_CLEAR(buf); - instantiate_prepared = F_ISSET_ATOMIC(page, WT_PAGE_INSTANTIATE_PREPARE_UPDATE); + instantiate_prepared = F_ISSET(session, WT_SESSION_INSTANTIATE_PREPARE); /* Walk the page, building indices. */ rip = page->pg_row; WT_CELL_FOREACH_KV (session, page->dsk, unpack) { - if (instantiate_prepared && !prepare && F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE)) + if (instantiate_prepared && !prepare && unpack.tw.prepare) prepare = true; switch (unpack.type) { case WT_CELL_KEY_OVFL: @@ -573,9 +574,9 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) * The visibility information is not referenced on the page so we need to ensure that * the value is globally visible at the point in time where we read the page into cache. */ - if (!btree->huffman_value && unpack.tw.stop_txn == WT_TXN_MAX && - unpack.tw.stop_ts == WT_TS_MAX && !F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE) && - __wt_txn_visible_all(session, unpack.tw.start_txn, unpack.tw.durable_start_ts)) + if (!btree->huffman_value && (__wt_time_window_is_empty(&unpack.tw) || + (!__wt_time_window_has_stop(&unpack.tw) && + __wt_txn_tw_start_visible_all(session, &unpack.tw)))) __wt_row_leaf_value_set(page, rip - 1, &unpack); break; case WT_CELL_VALUE_OVFL: @@ -605,8 +606,8 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) WT_ROW_FOREACH (page, rip, i) { /* Unpack the on-page value cell. */ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack); - if (F_ISSET(&unpack, WT_CELL_UNPACK_PREPARE)) { - if (unpack.tw.stop_ts == WT_TS_MAX && unpack.tw.stop_txn == WT_TXN_MAX) { + if (unpack.tw.prepare) { + if (!__wt_time_window_has_stop(&unpack.tw)) { /* Take the value from the original page cell. */ WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &buf)); diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c index 3f113e4b2dc..cb79127188d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_random.c +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -474,7 +474,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) uint64_t n, skip; uint32_t read_flags; - btree = cbt->btree; + btree = CUR2BT(cbt); cursor = &cbt->iface; session = CUR2S(cbt); diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 4d83914e1a3..7f733691119 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -157,10 +157,12 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) * evicting that page and deciding that is a sign that eviction is unstuck. */ page_flags = WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED; - FLD_SET(page_flags, WT_PAGE_INSTANTIATE_PREPARE_UPDATE); if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE)) FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS); - WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, ¬used)); + F_SET(session, WT_SESSION_INSTANTIATE_PREPARE); + ret = __wt_page_inmem(session, ref, tmp.data, page_flags, ¬used); + F_CLR(session, WT_SESSION_INSTANTIATE_PREPARE); + WT_ERR(ret); tmp.mem = NULL; skip_read: diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index abffa19cf56..100bee74999 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -249,7 +249,7 @@ __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value) * still required for fixed length column store as we have issues with this table type in * durable history which we're planning to address in PM-1814. */ - WT_ASSERT(session, cbt->btree->type == BTREE_COL_FIX); + WT_ASSERT(session, CUR2BT(cbt)->type == BTREE_COL_FIX); WT_RET(__value_return(cbt)); } else { /* diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index b4083c16e12..b1d5b102dcf 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -1383,7 +1383,7 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT WT_SAVE_UPD *supd; WT_UPDATE *prev_onpage, *upd; uint64_t recno; - uint32_t i, page_flags, slot; + uint32_t i, slot; /* * In 04/2016, we removed column-store record numbers from the WT_PAGE structure, leading to @@ -1405,8 +1405,10 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT * our caller will not discard the disk image when discarding the original page, and our caller * will discard the allocated page on error, when discarding the allocated WT_REF. */ - page_flags = WT_PAGE_DISK_ALLOC | WT_PAGE_INSTANTIATE_PREPARE_UPDATE; - WT_RET(__wt_page_inmem(session, ref, multi->disk_image, page_flags, &page)); + F_SET(session, WT_SESSION_INSTANTIATE_PREPARE); + ret = __wt_page_inmem(session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, &page); + F_CLR(session, WT_SESSION_INSTANTIATE_PREPARE); + WT_RET(ret); multi->disk_image = NULL; /* diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index c2f0ec0c3ce..feb5af65bed 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -264,7 +264,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) addr_unpack.ta.newest_stop_txn = WT_TXN_MAX; } if (ckpt->ta.prepare) - F_SET(&addr_unpack, WT_CELL_UNPACK_PREPARE); + addr_unpack.ta.prepare = 1; addr_unpack.raw = WT_CELL_ADDR_INT; /* Verify the tree. */ diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c index a4a4f8b662d..4d40d9d2ce6 100644 --- a/src/third_party/wiredtiger/src/btree/col_modify.c +++ b/src/third_party/wiredtiger/src/btree/col_modify.c @@ -31,7 +31,7 @@ __wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_U u_int i, skipdepth; bool append, logged; - btree = cbt->btree; + btree = CUR2BT(cbt); ins = NULL; page = cbt->ref->page; session = CUR2S(cbt); diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c index 301bdf13d1b..90a797ba56e 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c @@ -96,7 +96,7 @@ __curbackup_incr_next(WT_CURSOR *cursor) const char *file; cb = (WT_CURSOR_BACKUP *)cursor; - btree = cb->incr_cursor == NULL ? NULL : ((WT_CURSOR_BTREE *)cb->incr_cursor)->btree; + btree = cb->incr_cursor == NULL ? NULL : CUR2BT(cb->incr_cursor); raw = F_MASK(cursor, WT_CURSTD_RAW); CURSOR_API_CALL(cursor, session, get_value, btree); F_CLR(cursor, WT_CURSTD_RAW); diff --git a/src/third_party/wiredtiger/src/cursor/cur_bulk.c b/src/third_party/wiredtiger/src/cursor/cur_bulk.c index 6eb4351276b..8302330edd1 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_bulk.c +++ b/src/third_party/wiredtiger/src/cursor/cur_bulk.c @@ -41,7 +41,7 @@ __curbulk_insert_fix(WT_CURSOR *cursor) uint64_t recno; cbulk = (WT_CURSOR_BULK *)cursor; - btree = cbulk->cbt.btree; + btree = CUR2BT(&cbulk->cbt); /* * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are @@ -91,7 +91,7 @@ __curbulk_insert_fix_bitmap(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbulk = (WT_CURSOR_BULK *)cursor; - btree = cbulk->cbt.btree; + btree = CUR2BT(&cbulk->cbt); /* * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are @@ -124,7 +124,7 @@ __curbulk_insert_var(WT_CURSOR *cursor) uint64_t recno; cbulk = (WT_CURSOR_BULK *)cursor; - btree = cbulk->cbt.btree; + btree = CUR2BT(&cbulk->cbt); /* * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are @@ -228,7 +228,7 @@ __curbulk_insert_row(WT_CURSOR *cursor) int cmp; cbulk = (WT_CURSOR_BULK *)cursor; - btree = cbulk->cbt.btree; + btree = CUR2BT(&cbulk->cbt); /* * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are @@ -274,7 +274,7 @@ __curbulk_insert_row_skip_check(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbulk = (WT_CURSOR_BULK *)cursor; - btree = cbulk->cbt.btree; + btree = CUR2BT(&cbulk->cbt); /* * Bulk cursor inserts are updates, but don't need auto-commit transactions because they are @@ -301,27 +301,27 @@ int __wt_curbulk_init( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) { - WT_CURSOR *c; + WT_CURSOR *cursor; WT_CURSOR_BTREE *cbt; - c = &cbulk->cbt.iface; + cursor = &cbulk->cbt.iface; cbt = &cbulk->cbt; /* Bulk cursors only support insert and close (reset is a no-op). */ - __wt_cursor_set_notsup(c); - switch (cbt->btree->type) { + __wt_cursor_set_notsup(cursor); + switch (CUR2BT(cbt)->type) { case BTREE_COL_FIX: - c->insert = bitmap ? __curbulk_insert_fix_bitmap : __curbulk_insert_fix; + cursor->insert = bitmap ? __curbulk_insert_fix_bitmap : __curbulk_insert_fix; break; case BTREE_COL_VAR: - c->insert = __curbulk_insert_var; + cursor->insert = __curbulk_insert_var; break; case BTREE_ROW: /* * Row-store order comparisons are expensive, so we optionally skip them when we know the * input is correct. */ - c->insert = skip_sort_check ? __curbulk_insert_row_skip_check : __curbulk_insert_row; + cursor->insert = skip_sort_check ? __curbulk_insert_row_skip_check : __curbulk_insert_row; break; } @@ -329,7 +329,22 @@ __wt_curbulk_init( cbulk->recno = 0; cbulk->bitmap = bitmap; if (bitmap) - F_SET(c, WT_CURSTD_RAW); + F_SET(cursor, WT_CURSTD_RAW); return (__wt_bulk_init(session, cbulk)); } + +/* + * __wt_curbulk_close -- + * Close a bulk cursor. + */ +int +__wt_curbulk_close(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) +{ + WT_DECL_RET; + + ret = __wt_bulk_wrapup(session, cbulk); + + __wt_buf_free(session, &cbulk->last); + return (ret); +} diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index b2d75494110..e19ee8b32b0 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -27,7 +27,7 @@ __curfile_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)a; - CURSOR_API_CALL(a, session, compare, cbt->btree); + CURSOR_API_CALL(a, session, compare, CUR2BT(cbt)); /* * Check both cursors are a "file:" type then call the underlying function, it can handle @@ -57,7 +57,7 @@ __curfile_equals(WT_CURSOR *a, WT_CURSOR *b, int *equalp) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)a; - CURSOR_API_CALL(a, session, equals, cbt->btree); + CURSOR_API_CALL(a, session, equals, CUR2BT(cbt)); /* * Check both cursors are a "file:" type then call the underlying function, it can handle @@ -87,7 +87,7 @@ __curfile_next(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL(cursor, session, next, cbt->btree); + CURSOR_API_CALL(cursor, session, next, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__wt_btcur_next(cbt, false)); @@ -114,7 +114,7 @@ __wt_curfile_next_random(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL(cursor, session, next, cbt->btree); + CURSOR_API_CALL(cursor, session, next, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__wt_btcur_next_random(cbt)); @@ -140,7 +140,7 @@ __curfile_prev(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL(cursor, session, prev, cbt->btree); + CURSOR_API_CALL(cursor, session, prev, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__wt_btcur_prev(cbt, false)); @@ -166,7 +166,7 @@ __curfile_reset(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, cbt->btree); + CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); ret = __wt_btcur_reset(cbt); @@ -192,7 +192,7 @@ __curfile_search(WT_CURSOR *cursor) uint64_t time_start, time_stop; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL(cursor, session, search, cbt->btree); + CURSOR_API_CALL(cursor, session, search, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -223,7 +223,7 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact) uint64_t time_start, time_stop; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL(cursor, session, search_near, cbt->btree); + CURSOR_API_CALL(cursor, session, search_near, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -254,7 +254,7 @@ __curfile_insert(WT_CURSOR *cursor) uint64_t time_start, time_stop; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL_BTREE(cursor, session, insert, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, insert); WT_ERR(__cursor_copy_release(cursor)); if (!F_ISSET(cursor, WT_CURSTD_APPEND)) @@ -295,7 +295,7 @@ __wt_curfile_insert_check(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; tret = 0; - CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -322,7 +322,7 @@ __curfile_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL_BTREE(cursor, session, modify, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, modify); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -357,7 +357,7 @@ __curfile_update(WT_CURSOR *cursor) uint64_t time_start, time_stop; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__cursor_checkvalue(cursor)); @@ -400,7 +400,7 @@ __curfile_remove(WT_CURSOR *cursor) positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree); + CURSOR_REMOVE_API_CALL(cursor, session, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -437,7 +437,7 @@ __curfile_reserve(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL_BTREE(cursor, session, reserve, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, reserve); WT_ERR(__cursor_copy_release(cursor)); WT_ERR(__cursor_checkkey(cursor)); @@ -474,13 +474,12 @@ static int __curfile_close(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; - WT_CURSOR_BULK *cbulk; WT_DECL_RET; WT_SESSION_IMPL *session; bool dead, released; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, cbt->btree); + CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, CUR2BT(cbt)); WT_ERR(__cursor_copy_release(cursor)); err: @@ -496,12 +495,10 @@ err: } dead = F_ISSET(cursor, WT_CURSTD_DEAD); - if (F_ISSET(cursor, WT_CURSTD_BULK)) { - /* Free the bulk-specific resources. */ - cbulk = (WT_CURSOR_BULK *)cbt; - WT_TRET(__wt_bulk_wrapup(session, cbulk)); - __wt_buf_free(session, &cbulk->last); - } + + /* Free the bulk-specific resources. */ + if (F_ISSET(cursor, WT_CURSTD_BULK)) + WT_TRET(__wt_curbulk_close(session, (WT_CURSOR_BULK *)cursor)); WT_TRET(__wt_btcur_close(cbt, false)); /* The URI is owned by the btree handle. */ @@ -543,7 +540,6 @@ __curfile_cache(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; session = CUR2S(cursor); - cbt->dhandle = cbt->btree->dhandle; WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle)); WT_TRET(__wt_session_release_dhandle(session)); @@ -557,6 +553,7 @@ __curfile_cache(WT_CURSOR *cursor) static int __curfile_reopen(WT_CURSOR *cursor, bool check_only) { + WT_BTREE *btree; WT_CURSOR_BTREE *cbt; WT_DATA_HANDLE *dhandle; WT_DECL_RET; @@ -601,10 +598,10 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only) WT_ASSERT(session, dhandle->type == WT_DHANDLE_TYPE_BTREE); WT_ASSERT(session, ((WT_BTREE *)dhandle->handle)->root.page != NULL); - cbt->btree = dhandle->handle; - cursor->internal_uri = cbt->btree->dhandle->name; - cursor->key_format = cbt->btree->key_format; - cursor->value_format = cbt->btree->value_format; + btree = CUR2BT(cbt); + cursor->internal_uri = btree->dhandle->name; + cursor->key_format = btree->key_format; + cursor->value_format = btree->value_format; } return (ret); } @@ -661,7 +658,7 @@ __curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], cursor->internal_uri = btree->dhandle->name; cursor->key_format = btree->key_format; cursor->value_format = btree->value_format; - cbt->btree = btree; + cbt->dhandle = session->dhandle; /* * Increment the data-source's in-use counter; done now because closing the cursor will @@ -728,12 +725,14 @@ __curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], if (0) { err: + __wt_cursor_dhandle_decr_use(session); + /* * Our caller expects to release the data handle if we fail. Disconnect it from the cursor * before closing. */ - __wt_cursor_dhandle_decr_use(session); - cbt->btree = NULL; + cbt->dhandle = NULL; + WT_TRET(__curfile_close(cursor)); *cursorp = NULL; } diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 60a2d241e07..5967bd2bf74 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -222,7 +222,7 @@ __curmetadata_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) a_file_cursor = a_mdc->file_cursor; b_file_cursor = b_mdc->file_cursor; - CURSOR_API_CALL(a, session, compare, ((WT_CURSOR_BTREE *)a_file_cursor)->btree); + CURSOR_API_CALL(a, session, compare, CUR2BT(a_file_cursor)); if (b->compare != __curmetadata_compare) WT_ERR_MSG(session, EINVAL, "Can only compare cursors of the same type"); @@ -258,7 +258,7 @@ __curmetadata_next(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, next, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, next, CUR2BT(file_cursor)); if (!F_ISSET(mdc, WT_MDC_POSITIONED)) WT_ERR(__curmetadata_metadata_search(session, cursor)); @@ -304,7 +304,7 @@ __curmetadata_prev(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, prev, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, prev, CUR2BT(file_cursor)); if (F_ISSET(mdc, WT_MDC_ONMETADATA)) { ret = WT_NOTFOUND; @@ -351,8 +351,7 @@ __curmetadata_reset(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL_PREPARE_ALLOWED( - cursor, session, reset, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, CUR2BT(file_cursor)); if (F_ISSET(mdc, WT_MDC_POSITIONED) && !F_ISSET(mdc, WT_MDC_ONMETADATA)) ret = file_cursor->reset(file_cursor); @@ -377,7 +376,7 @@ __curmetadata_search(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, search, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, search, CUR2BT(file_cursor)); WT_MD_CURSOR_NEEDKEY(cursor); @@ -414,7 +413,7 @@ __curmetadata_search_near(WT_CURSOR *cursor, int *exact) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, search_near, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, search_near, CUR2BT(file_cursor)); WT_MD_CURSOR_NEEDKEY(cursor); @@ -452,7 +451,7 @@ __curmetadata_insert(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, insert, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, insert, CUR2BT(file_cursor)); WT_MD_CURSOR_NEEDKEY(cursor); WT_MD_CURSOR_NEEDVALUE(cursor); @@ -480,7 +479,7 @@ __curmetadata_update(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, update, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, update, CUR2BT(file_cursor)); WT_MD_CURSOR_NEEDKEY(cursor); WT_MD_CURSOR_NEEDVALUE(cursor); @@ -508,7 +507,7 @@ __curmetadata_remove(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; file_cursor = mdc->file_cursor; - CURSOR_API_CALL(cursor, session, remove, ((WT_CURSOR_BTREE *)file_cursor)->btree); + CURSOR_API_CALL(cursor, session, remove, CUR2BT(file_cursor)); WT_MD_CURSOR_NEEDKEY(cursor); @@ -535,8 +534,7 @@ __curmetadata_close(WT_CURSOR *cursor) mdc = (WT_CURSOR_METADATA *)cursor; c = mdc->file_cursor; - CURSOR_API_CALL_PREPARE_ALLOWED( - cursor, session, close, c == NULL ? NULL : ((WT_CURSOR_BTREE *)c)->btree); + CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, c == NULL ? NULL : CUR2BT(c)); err: if (c != NULL) diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index fd81465eb76..57a22a04aac 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -655,6 +655,9 @@ __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle) __wt_buf_free(session, &cursor->key); __wt_buf_free(session, &cursor->value); + /* Discard the underlying WT_CURSOR_BTREE buffers. */ + __wt_btcur_cache((WT_CURSOR_BTREE *)cursor); + /* * Acquire a reference while decrementing the in-use counter. After this point, the dhandle may * be marked dead, but the actual handle won't be removed. diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index ea4858b0c30..02e1316fde0 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -66,7 +66,7 @@ __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner)); - *hs_btreep = ((WT_CURSOR_BTREE *)session->hs_cursor)->btree; + *hs_btreep = CUR2BT(session->hs_cursor); WT_ASSERT(session, *hs_btreep != NULL); WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner)); @@ -303,7 +303,7 @@ __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) last_upd->next = mod->mod_row_update[hs_cbt->slot]; } - WT_WITH_BTREE(session, hs_cbt->btree, + WT_WITH_BTREE(session, CUR2BT(hs_cbt), ret = __wt_row_modify(hs_cbt, &hs_cbt->iface.key, NULL, hs_upd, WT_UPDATE_INVALID, true)); return (ret); } @@ -526,8 +526,8 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, WT_DECL_RET; cbt = (WT_CURSOR_BTREE *)cursor; - WT_WITH_BTREE(session, cbt->btree, ret = __hs_insert_record_with_btree(session, cursor, btree, - key, upd, type, hs_value, stop_ts_pair)); + WT_WITH_BTREE(session, CUR2BT(cbt), ret = __hs_insert_record_with_btree(session, cursor, btree, + key, upd, type, hs_value, stop_ts_pair)); return (ret); } @@ -682,7 +682,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) if (upd->type == WT_UPDATE_TOMBSTONE) { if (modifies.size > 0) { if (upd->start_ts == WT_TS_NONE) { - WT_ERR(__wt_hs_delete_key(session, btree->id, key)); + /* We can only delete history store entries that have timestamps. */ + WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); } __wt_modify_vector_pop(&modifies, &upd); @@ -733,7 +734,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) if (prev_upd->type == WT_UPDATE_TOMBSTONE) { WT_ASSERT(session, modifies.size > 0); if (prev_upd->start_ts == WT_TS_NONE) { - WT_ERR(__wt_hs_delete_key(session, btree->id, key)); + /* We can only delete history store entries that have timestamps. */ + WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); } __wt_modify_vector_pop(&modifies, &prev_upd); @@ -788,7 +790,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size)); WT_STAT_CONN_SET(session, cache_hs_ondisk, hs_size); - max_hs_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max; + max_hs_size = CUR2BT(cursor)->file_max; if (max_hs_size != 0 && (uint64_t)hs_size > max_hs_size) WT_ERR_PANIC(session, WT_PANIC, "WiredTigerHS: file size of %" PRIu64 " exceeds maximum size %" PRIu64, (uint64_t)hs_size, @@ -1117,11 +1119,12 @@ err: } /* - * __hs_delete_key_int -- - * Internal helper for deleting history store content for a given key. + * __hs_delete_key_from_ts_int -- + * Internal helper for deleting history store content of a given key from a timestamp. */ static int -__hs_delete_key_int(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key) +__hs_delete_key_from_ts_int( + WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) { WT_CURSOR *hs_cursor; WT_DECL_ITEM(srch_key); @@ -1135,7 +1138,7 @@ __hs_delete_key_int(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM * hs_cursor = session->hs_cursor; WT_RET(__wt_scr_alloc(session, 0, &srch_key)); - hs_cursor->set_key(hs_cursor, btree_id, key, WT_TS_NONE, (uint64_t)0); + hs_cursor->set_key(hs_cursor, btree_id, key, ts, (uint64_t)0); WT_ERR(__wt_buf_set(session, srch_key, hs_cursor->key.data, hs_cursor->key.size)); WT_ERR_NOTFOUND_OK(hs_cursor->search_near(hs_cursor, &exact), true); /* Empty history store is fine. */ @@ -1176,11 +1179,12 @@ err: } /* - * __wt_hs_delete_key -- - * Delete an entire key's worth of data in the history store. + * __wt_hs_delete_key_from_ts -- + * Delete history store content of a given key from a timestamp. */ int -__wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key) +__wt_hs_delete_key_from_ts( + WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) { WT_DECL_RET; uint32_t session_flags; @@ -1205,7 +1209,7 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *k F_SET(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); /* The tree structure can change while we try to insert the mod list, retry if that happens. */ - while ((ret = __hs_delete_key_int(session, btree_id, key)) == WT_RESTART) + while ((ret = __hs_delete_key_from_ts_int(session, btree_id, key, ts)) == WT_RESTART) ; F_CLR(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h index e4455d62b03..facf3ef247d 100644 --- a/src/third_party/wiredtiger/src/include/api.h +++ b/src/third_party/wiredtiger/src/include/api.h @@ -86,10 +86,10 @@ while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, bt, config, cfg) \ +#define TXN_API_CALL(s, h, n, dh, config, cfg) \ do { \ bool __autotxn = false, __update = false; \ - API_CALL(s, h, n, bt, config, cfg); \ + API_CALL(s, h, n, dh, config, cfg); \ __wt_txn_timestamp_flags(s); \ __autotxn = !F_ISSET((s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING); \ if (__autotxn) \ @@ -233,12 +233,12 @@ CURSOR_REMOVE_API_CALL(cur, s, bt); \ JOINABLE_CURSOR_CALL_CHECK(cur) -#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \ - (s) = (WT_SESSION_IMPL *)(cur)->session; \ - SESSION_API_PREPARE_CHECK(s, WT_CURSOR, n); \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \ - if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ - __wt_cache_full(s)) \ +#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + SESSION_API_PREPARE_CHECK(s, WT_CURSOR, n); \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, ((WT_CURSOR_BTREE *)(cur))->dhandle); \ + if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && !F_ISSET(CUR2BT(cur), WT_BTREE_IGNORE_CACHE) && \ + __wt_cache_full(s)) \ WT_ERR(WT_CACHE_FULL); #define CURSOR_UPDATE_API_CALL(cur, s, n) \ diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index e466f5716f8..f6e1533dd23 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -626,17 +626,16 @@ struct __wt_page { uint8_t type; /* Page type */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_PAGE_BUILD_KEYS 0x001u /* Keys have been built in memory */ -#define WT_PAGE_DISK_ALLOC 0x002u /* Disk image in allocated memory */ -#define WT_PAGE_DISK_MAPPED 0x004u /* Disk image in mapped memory */ -#define WT_PAGE_EVICT_LRU 0x008u /* Page is on the LRU queue */ -#define WT_PAGE_EVICT_NO_PROGRESS 0x010u /* Eviction doesn't count as progress */ -#define WT_PAGE_INSTANTIATE_PREPARE_UPDATE 0x020u /* Instantiate prepared updates */ -#define WT_PAGE_OVERFLOW_KEYS 0x040u /* Page has overflow keys */ -#define WT_PAGE_SPLIT_INSERT 0x080u /* A leaf page was split for append */ -#define WT_PAGE_UPDATE_IGNORE 0x100u /* Ignore updates on page discard */ - /* AUTOMATIC FLAG VALUE GENERATION STOP */ - uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ +#define WT_PAGE_BUILD_KEYS 0x01u /* Keys have been built in memory */ +#define WT_PAGE_DISK_ALLOC 0x02u /* Disk image in allocated memory */ +#define WT_PAGE_DISK_MAPPED 0x04u /* Disk image in mapped memory */ +#define WT_PAGE_EVICT_LRU 0x08u /* Page is on the LRU queue */ +#define WT_PAGE_EVICT_NO_PROGRESS 0x10u /* Eviction doesn't count as progress */ +#define WT_PAGE_OVERFLOW_KEYS 0x20u /* Page has overflow keys */ +#define WT_PAGE_SPLIT_INSERT 0x40u /* A leaf page was split for append */ +#define WT_PAGE_UPDATE_IGNORE 0x80u /* Ignore updates on page discard */ + /* AUTOMATIC FLAG VALUE GENERATION STOP */ + uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ uint8_t unused[2]; /* Unused padding */ diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h index d7a5452dbe1..3c63fe4f49d 100644 --- a/src/third_party/wiredtiger/src/include/cell.h +++ b/src/third_party/wiredtiger/src/include/cell.h @@ -146,8 +146,7 @@ struct __wt_cell { /* AUTOMATIC FLAG VALUE GENERATION START */ #define WT_CELL_UNPACK_OVERFLOW 0x1u /* cell is an overflow */ -#define WT_CELL_UNPACK_PREPARE 0x2u /* cell is part of a prepared transaction */ -#define WT_CELL_UNPACK_TIME_WINDOW_CLEARED 0x4u /* time window cleared because of restart */ +#define WT_CELL_UNPACK_TIME_WINDOW_CLEARED 0x2u /* time window cleared because of restart */ /* AUTOMATIC FLAG VALUE GENERATION STOP */ /* diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index fad797a842e..f9aa0928e26 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -836,10 +836,8 @@ copy_cell_restart: break; flags = *p++; /* skip second descriptor byte */ - if (LF_ISSET(WT_CELL_PREPARE)) { - F_SET(unpack, WT_CELL_UNPACK_PREPARE); + if (LF_ISSET(WT_CELL_PREPARE)) ta->prepare = 1; - } if (LF_ISSET(WT_CELL_TS_START)) WT_RET( __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts)); @@ -886,10 +884,8 @@ copy_cell_restart: break; flags = *p++; /* skip second descriptor byte */ - if (LF_ISSET(WT_CELL_PREPARE)) { - F_SET(unpack, WT_CELL_UNPACK_PREPARE); + if (LF_ISSET(WT_CELL_PREPARE)) tw->prepare = 1; - } if (LF_ISSET(WT_CELL_TS_START)) WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_ts)); if (LF_ISSET(WT_CELL_TXN_START)) diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 522a031630e..6f48337e9d8 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -71,14 +71,15 @@ struct __wt_cursor_backup { uint8_t flags; }; +/* Get the WT_BTREE from any WT_CURSOR/WT_CURSOR_BTREE. */ +#define CUR2BT(c) \ + (((WT_CURSOR_BTREE *)(c))->dhandle == NULL ? \ + NULL : \ + (WT_BTREE *)((WT_CURSOR_BTREE *)(c))->dhandle->handle) + struct __wt_cursor_btree { WT_CURSOR iface; - /* - * The btree field is safe to use when the cursor is open. When the cursor is cached, the btree - * may be closed, so it is only safe initially to look at the underlying data handle. - */ - WT_BTREE *btree; /* Enclosing btree */ WT_DATA_HANDLE *dhandle; /* Data handle for the btree */ /* diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h index 3f863e46fea..49e9e756fe6 100644 --- a/src/third_party/wiredtiger/src/include/dhandle.h +++ b/src/third_party/wiredtiger/src/include/dhandle.h @@ -37,7 +37,7 @@ !F_ISSET(dhandle, WT_DHANDLE_DROPPED)) /* The metadata cursor's data handle. */ -#define WT_SESSION_META_DHANDLE(s) (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) +#define WT_SESSION_META_DHANDLE(s) (((WT_CURSOR_BTREE *)((s)->meta_cursor))->dhandle) #define WT_DHANDLE_ACQUIRE(dhandle) (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index abaf0575fee..0f6e711354f 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -478,6 +478,8 @@ extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, WT_CUR extern int __wt_curbackup_open_incr(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, WT_CURSOR *cursor, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbulk_close(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], @@ -762,8 +764,8 @@ extern int __wt_hs_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, WT_ITEM *key, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key) - WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id, + const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) @@ -1596,6 +1598,7 @@ extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_S extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash); extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_btcur_cache(WT_CURSOR_BTREE *cbt); extern void __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt); extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt); @@ -1830,10 +1833,20 @@ static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_time_aggregate_is_empty(WT_TIME_AGGREGATE *ta) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_time_window_has_stop(WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_time_window_is_empty(WT_TIME_WINDOW *tw) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_time_windows_equal(WT_TIME_WINDOW *tw1, WT_TIME_WINDOW *tw2) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +static inline bool __wt_txn_tw_stop_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); static inline bool __wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd) @@ -2184,6 +2197,8 @@ static inline void __wt_time_aggregate_init(WT_TIME_AGGREGATE *ta); static inline void __wt_time_aggregate_init_max(WT_TIME_AGGREGATE *ta); static inline void __wt_time_aggregate_merge(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source); static inline void __wt_time_aggregate_update(WT_TIME_AGGREGATE *ta, WT_TIME_WINDOW *tw); +static inline void __wt_time_window_clear_obsolete( + WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw, uint64_t oldest_id, wt_timestamp_t oldest_ts); static inline void __wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source); static inline void __wt_time_window_init(WT_TIME_WINDOW *tw); static inline void __wt_time_window_init_max(WT_TIME_WINDOW *tw); diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h index c31a92d0edd..3b75373ae56 100644 --- a/src/third_party/wiredtiger/src/include/reconcile.h +++ b/src/third_party/wiredtiger/src/include/reconcile.h @@ -28,6 +28,12 @@ struct __wt_reconcile { /* Track the oldest running transaction. */ uint64_t last_running; + /* Track the oldest running id. This one doesn't consider checkpoint. */ + uint64_t rec_start_oldest_id; + + /* Track the pinned timestamp at the time reconciliation started. */ + wt_timestamp_t rec_start_pinned_ts; + /* Track the page's min/maximum transactions. */ uint64_t max_txn; wt_timestamp_t max_ts; diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index e2cd24d0e95..c46236b4d1d 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -167,31 +167,32 @@ struct __wt_session_impl { #define WT_SESSION_CAN_WAIT 0x00000008u #define WT_SESSION_HS_CURSOR 0x00000010u #define WT_SESSION_IGNORE_CACHE_SIZE 0x00000020u -#define WT_SESSION_INTERNAL 0x00000040u -#define WT_SESSION_LOCKED_CHECKPOINT 0x00000080u -#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000100u -#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000200u -#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000400u -#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00000800u -#define WT_SESSION_LOCKED_METADATA 0x00001000u -#define WT_SESSION_LOCKED_PASS 0x00002000u -#define WT_SESSION_LOCKED_SCHEMA 0x00004000u -#define WT_SESSION_LOCKED_SLOT 0x00008000u -#define WT_SESSION_LOCKED_TABLE_READ 0x00010000u -#define WT_SESSION_LOCKED_TABLE_WRITE 0x00020000u -#define WT_SESSION_LOCKED_TURTLE 0x00040000u -#define WT_SESSION_LOGGING_INMEM 0x00080000u -#define WT_SESSION_NO_DATA_HANDLES 0x00100000u -#define WT_SESSION_NO_LOGGING 0x00200000u -#define WT_SESSION_NO_RECONCILE 0x00400000u -#define WT_SESSION_NO_SCHEMA_LOCK 0x00800000u -#define WT_SESSION_QUIET_CORRUPT_FILE 0x01000000u -#define WT_SESSION_READ_WONT_NEED 0x02000000u -#define WT_SESSION_RESOLVING_MODIFY 0x04000000u -#define WT_SESSION_RESOLVING_TXN 0x08000000u -#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u -#define WT_SESSION_SCHEMA_TXN 0x20000000u -#define WT_SESSION_SERVER_ASYNC 0x40000000u +#define WT_SESSION_INSTANTIATE_PREPARE 0x00000040u +#define WT_SESSION_INTERNAL 0x00000080u +#define WT_SESSION_LOCKED_CHECKPOINT 0x00000100u +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000200u +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000400u +#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000800u +#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00001000u +#define WT_SESSION_LOCKED_METADATA 0x00002000u +#define WT_SESSION_LOCKED_PASS 0x00004000u +#define WT_SESSION_LOCKED_SCHEMA 0x00008000u +#define WT_SESSION_LOCKED_SLOT 0x00010000u +#define WT_SESSION_LOCKED_TABLE_READ 0x00020000u +#define WT_SESSION_LOCKED_TABLE_WRITE 0x00040000u +#define WT_SESSION_LOCKED_TURTLE 0x00080000u +#define WT_SESSION_LOGGING_INMEM 0x00100000u +#define WT_SESSION_NO_DATA_HANDLES 0x00200000u +#define WT_SESSION_NO_LOGGING 0x00400000u +#define WT_SESSION_NO_RECONCILE 0x00800000u +#define WT_SESSION_NO_SCHEMA_LOCK 0x01000000u +#define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u +#define WT_SESSION_READ_WONT_NEED 0x04000000u +#define WT_SESSION_RESOLVING_MODIFY 0x08000000u +#define WT_SESSION_RESOLVING_TXN 0x10000000u +#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000000u +#define WT_SESSION_SCHEMA_TXN 0x40000000u +#define WT_SESSION_SERVER_ASYNC 0x80000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/timestamp.i b/src/third_party/wiredtiger/src/include/timestamp.i index c6e7d66ffef..32cd51774c4 100644 --- a/src/third_party/wiredtiger/src/include/timestamp.i +++ b/src/third_party/wiredtiger/src/include/timestamp.i @@ -53,6 +53,40 @@ __wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source) } /* + * __wt_time_window_clear_obsolete -- + * Where possible modify time window values to avoid writing obsolete values to the cell later. + */ +static inline void +__wt_time_window_clear_obsolete( + WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw, uint64_t oldest_id, wt_timestamp_t oldest_ts) +{ + /* + * In memory database don't need to avoid writing values to the cell. If we remove this check we + * create an extra update on the end of the chain later in reconciliation as we'll re-append the + * disk image value to the update chain. + */ + if (!tw->prepare && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { + if (tw->stop_txn == WT_TXN_MAX && tw->start_txn < oldest_id) + tw->start_txn = WT_TXN_NONE; + /* Avoid retrieving the pinned timestamp unless we need it. */ + if (tw->stop_ts == WT_TS_MAX) { + /* + * The durable stop timestamp should be it's default value whenever the stop timestamp + * is. + */ + WT_ASSERT(session, tw->durable_stop_ts == WT_TS_NONE); + /* + * The durable start timestamp is always greater than or equal to the start timestamp, + * as such we must check it against the pinned timestamp and not the start timestamp. + */ + WT_ASSERT(session, tw->start_ts <= tw->durable_start_ts); + if (tw->durable_start_ts < oldest_ts) + tw->start_ts = tw->durable_start_ts = WT_TS_NONE; + } + } +} + +/* * __wt_time_window_is_empty -- * Return true if the time window is equivalent to the default time window. */ @@ -65,6 +99,16 @@ __wt_time_window_is_empty(WT_TIME_WINDOW *tw) } /* + * __wt_time_window_has_stop -- + * Check if the stop time window is set. + */ +static inline bool +__wt_time_window_has_stop(WT_TIME_WINDOW *tw) +{ + return (tw->stop_txn != WT_TXN_MAX || tw->stop_ts != WT_TS_MAX); +} + +/* * __wt_time_windows_equal -- * Return true if the time windows are the same. */ diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 90858eb6950..ff307d84226 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -602,6 +602,50 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va } /* + * __wt_txn_tw_stop_visible -- + * Is the given stop time window visible? + */ +static inline bool +__wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + return (__wt_time_window_has_stop(tw) && !tw->prepare && + __wt_txn_visible(session, tw->stop_txn, tw->stop_ts)); +} + +/* + * __wt_txn_tw_start_visible -- + * Is the given start time window visible? + */ +static inline bool +__wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + return ((__wt_time_window_has_stop(tw) || !tw->prepare) && + __wt_txn_visible(session, tw->start_txn, tw->start_ts)); +} + +/* + * __wt_txn_tw_start_visible_all -- + * Is the given start time window visible to all (possible) readers? + */ +static inline bool +__wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + return ((__wt_time_window_has_stop(tw) || !tw->prepare) && + __wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts)); +} + +/* + * __wt_txn_tw_stop_visible_all -- + * Is the given stop time window visible to all (possible) readers? + */ +static inline bool +__wt_txn_tw_stop_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) +{ + return (__wt_time_window_has_stop(tw) && !tw->prepare && + __wt_txn_visible_all(session, tw->stop_txn, tw->durable_stop_ts)); +} + +/* * __txn_visible_id -- * Can the current transaction see the given ID? */ @@ -873,11 +917,10 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint * tombstone and should return "not found", except for history store scan during rollback to * stable and when we are told to ignore non-globally visible tombstones. */ - if (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX && !tw.prepare && - __wt_txn_visible(session, tw.stop_txn, tw.stop_ts) && + if (__wt_txn_tw_stop_visible(session, &tw) && ((!F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE) && (!WT_IS_HS(S2BT(session)) || !F_ISSET(session, WT_SESSION_ROLLBACK_TO_STABLE))) || - __wt_txn_visible_all(session, tw.stop_txn, tw.durable_stop_ts))) { + __wt_txn_tw_stop_visible_all(session, &tw))) { cbt->upd_value->buf.data = NULL; cbt->upd_value->buf.size = 0; cbt->upd_value->durable_ts = tw.durable_stop_ts; @@ -887,14 +930,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint return (0); } - /* - * If the start time pair is visible and it is not a prepared value then we need to return the - * ondisk value. - */ - if ((!tw.prepare || (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX)) && - (__wt_txn_visible(session, tw.start_txn, tw.start_ts) || - F_ISSET(session, WT_SESSION_RESOLVING_MODIFY))) { - + /* If the start time pair is visible then we need to return the ondisk value. */ + if (__wt_txn_tw_start_visible(session, &tw) || F_ISSET(session, WT_SESSION_RESOLVING_MODIFY)) { /* If we are resolving a modify then the btree must be the history store. */ WT_ASSERT( session, (F_ISSET(session, WT_SESSION_RESOLVING_MODIFY) && WT_IS_HS(S2BT(session))) || @@ -1156,7 +1193,7 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE * aborted updates. Otherwise, we would have either already detected a conflict if we saw an * uncommitted update or determined that it would be safe to write if we saw a committed update. */ - if (!rollback && upd == NULL && cbt != NULL && cbt->btree->type != BTREE_COL_FIX && + if (!rollback && upd == NULL && cbt != NULL && CUR2BT(cbt)->type != BTREE_COL_FIX && cbt->ins == NULL) { __wt_read_cell_time_window(cbt, cbt->ref, &tw); if (tw.stop_txn != WT_TXN_MAX && tw.stop_ts != WT_TS_MAX) diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 7017cf74fd5..0f048ba2e1b 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -123,7 +123,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) if (have_primary) { WT_ENTER_PAGE_INDEX(session); - WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)primary)->btree, + WT_WITH_BTREE(session, CUR2BT(primary), ovfl = __wt_btree_lsm_over_size( session, hard_limit ? 2 * lsm_tree->chunk_size : lsm_tree->chunk_size)); WT_LEAVE_PAGE_INDEX(session); @@ -530,7 +530,7 @@ retry: */ if (lsm_tree->custom_generation == 0 || chunk->generation < lsm_tree->custom_generation) { - checkpoint = ((WT_CURSOR_BTREE *)cursor)->btree->dhandle->checkpoint; + checkpoint = ((WT_CURSOR_BTREE *)cursor)->dhandle->checkpoint; if (checkpoint == NULL && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) break; } @@ -639,7 +639,7 @@ retry: /* The last chunk is our new primary. */ if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && chunk->switch_txn == WT_TXN_NONE) { primary = clsm->chunks[clsm->nchunks - 1]->cursor; - btree = ((WT_CURSOR_BTREE *)primary)->btree; + btree = CUR2BT(primary); /* * If the primary is not yet set as the primary, do that now. Note that eviction was @@ -681,7 +681,7 @@ err: */ if (lsm_tree->custom_generation == 0 || chunk->generation < lsm_tree->custom_generation) { - checkpoint = ((WT_CURSOR_BTREE *)cursor)->btree->dhandle->checkpoint; + checkpoint = ((WT_CURSOR_BTREE *)cursor)->dhandle->checkpoint; WT_ASSERT(session, (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !chunk->empty) ? checkpoint != NULL : checkpoint == NULL); diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index 9a2bd723099..b6130e9cc41 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -71,7 +71,7 @@ __wt_metadata_cursor_open(WT_SESSION_IMPL *session, const char *config, WT_CURSO * Retrieve the btree from the cursor, rather than the session because we don't always switch * the metadata handle in to the session before entering this function. */ - btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree; + btree = CUR2BT(*cursorp); /* * Special settings for metadata: skew eviction so metadata almost always stays in cache and make diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c index 613f83f3f55..846311f6a1d 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_col.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c @@ -982,7 +982,7 @@ compare: (last.tw.durable_start_ts == tw.durable_start_ts && last.tw.start_ts == WT_TS_NONE && last.tw.start_txn == WT_TXN_NONE && last.tw.durable_stop_ts == tw.durable_stop_ts && - last.tw.stop_ts == WT_TS_MAX && last.tw.stop_txn == WT_TXN_MAX)); + !__wt_time_window_has_stop(&last.tw))); ++rle; goto next; } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index ccde8b402d7..787a5b882b8 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -533,9 +533,9 @@ __rec_row_zero_len(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) * tempted to check the time window against the default here - the check is subtly different due * to the grouping. */ - return ((tw->stop_ts == WT_TS_MAX && tw->stop_txn == WT_TXN_MAX) && + return (!__wt_time_window_has_stop(tw) && ((tw->start_ts == WT_TS_NONE && tw->start_txn == WT_TXN_NONE) || - __wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts))); + __wt_txn_tw_start_visible_all(session, tw))); } /* @@ -766,8 +766,7 @@ __wt_rec_row_leaf( * If we reconcile an on disk key with a globally visible stop time pair and there are no * new updates for that key, skip writing that key. */ - if (upd == NULL && (tw.stop_txn != WT_TXN_MAX || tw.stop_ts != WT_TS_MAX) && - __wt_txn_visible_all(session, tw.stop_txn, tw.durable_stop_ts)) + if (upd == NULL && __wt_txn_tw_stop_visible_all(session, &tw)) upd = &upd_tombstone; /* Build value cell. */ @@ -862,7 +861,8 @@ __wt_rec_row_leaf( */ if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !WT_IS_HS(btree)) { WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true)); - WT_ERR(__wt_hs_delete_key(session, btree->id, tmpkey)); + /* Start from WT_TS_NONE to delete all the history store content of the key. */ + WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, tmpkey, WT_TS_NONE)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal); } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c index b428ab8a3e9..4fbe119c3f5 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c @@ -81,7 +81,7 @@ __rec_append_orig_value( * Prepared updates should already be in the update list, add the original update to the * list only when the prepared update is a tombstone. */ - if (F_ISSET(unpack, WT_CELL_UNPACK_PREPARE) && upd->type != WT_UPDATE_TOMBSTONE) + if (unpack->tw.prepare && upd->type != WT_UPDATE_TOMBSTONE) return (0); /* @@ -113,14 +113,23 @@ __rec_append_orig_value( } /* + * We end up in this function because we have selected a newer value to write to disk. If we + * select the newest committed update, we should see a valid update here. We can only write + * uncommitted prepared updates in eviction and if the update chain only has uncommitted + * prepared updates, we cannot abort them concurrently when we are still evicting the page + * because we have to do a search for the prepared updates, which can not proceed until eviction + * finishes. + */ + WT_ASSERT(session, oldest_upd != NULL); + + /* * Additionally, we need to append a tombstone before the onpage value we're about to append to * the list, if the onpage value has a valid stop pair. Imagine a case where we insert and * delete a value respectively at timestamp 0 and 10, and later insert it again at 20. We need * the tombstone to tell us there is no value between 10 and 20. */ - if (unpack->tw.stop_ts != WT_TS_MAX || unpack->tw.stop_txn != WT_TXN_MAX) { - tombstone_globally_visible = - __wt_txn_visible_all(session, unpack->tw.stop_txn, unpack->tw.durable_stop_ts); + if (__wt_time_window_has_stop(&unpack->tw)) { + tombstone_globally_visible = __wt_txn_tw_stop_visible_all(session, &unpack->tw); /* No need to append the tombstone if it is already in the update chain. */ if (oldest_upd->type != WT_UPDATE_TOMBSTONE) { @@ -148,9 +157,8 @@ __rec_append_orig_value( * doesn't have same timestamp due to replacing of prepare timestamp with commit and * durable timestamps. Don't compare them when the on-disk version is a prepare. */ - WT_ASSERT(session, F_ISSET(unpack, WT_CELL_UNPACK_PREPARE) || - (unpack->tw.stop_ts == oldest_upd->start_ts && - unpack->tw.stop_txn == oldest_upd->txnid)); + WT_ASSERT(session, unpack->tw.prepare || (unpack->tw.stop_ts == oldest_upd->start_ts && + unpack->tw.stop_txn == oldest_upd->txnid)); if (tombstone_globally_visible) return (0); } @@ -212,9 +220,8 @@ __rec_need_save_upd( if (F_ISSET(r, WT_REC_CHECKPOINT) && upd_select->upd == NULL) return (false); - return ( - !__wt_txn_visible_all(session, upd_select->tw.stop_txn, upd_select->tw.durable_stop_ts) && - !__wt_txn_visible_all(session, upd_select->tw.start_txn, upd_select->tw.durable_start_ts)); + return (!__wt_txn_tw_stop_visible_all(session, &upd_select->tw) && + !__wt_txn_tw_start_visible_all(session, &upd_select->tw)); } /* @@ -524,6 +531,8 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v (upd_saved || F_ISSET(vpack, WT_CELL_UNPACK_OVERFLOW))) WT_ERR(__rec_append_orig_value(session, page, upd_select->upd, vpack)); + __wt_time_window_clear_obsolete( + session, &upd_select->tw, r->rec_start_oldest_id, r->rec_start_pinned_ts); err: __wt_scr_free(session, &tmp); return (ret); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index bdecb82b226..4784910c238 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -495,6 +495,13 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO WT_ORDERED_READ(r->last_running, txn_global->last_running); /* + * Cache the pinned timestamp and oldest id, these are used to when we clear obsolete timestamps + * and ids from time windows later in reconciliation. + */ + __wt_txn_pinned_timestamp(session, &r->rec_start_pinned_ts); + r->rec_start_oldest_id = __wt_txn_oldest_id(session); + + /* * The checkpoint transaction doesn't pin the oldest txn id, therefore the global last_running * can move beyond the checkpoint transaction id. When reconciling the metadata, we have to take * checkpoints into account. @@ -504,7 +511,6 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO if (ckpt_txn != WT_TXN_NONE && WT_TXNID_LT(ckpt_txn, r->last_running)) r->last_running = ckpt_txn; } - /* When operating on the history store table, we should never try history store eviction. */ WT_ASSERT(session, !F_ISSET(btree, WT_BTREE_HS) || !LF_ISSET(WT_REC_HS)); diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c index 5e498d60fb9..390ecb0b5a8 100644 --- a/src/third_party/wiredtiger/src/schema/schema_truncate.c +++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c @@ -138,7 +138,7 @@ __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR WT_ERR(__cursor_needkey(start)); if (stop != NULL) WT_ERR(__cursor_needkey(stop)); - WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree, + WT_WITH_BTREE(session, CUR2BT(start), ret = __wt_btcur_range_truncate((WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop)); } else if (WT_PREFIX_MATCH(uri, "table:")) ret = __wt_table_range_truncate((WT_CURSOR_TABLE *)start, (WT_CURSOR_TABLE *)stop); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index bcde2932ab2..bd9692478b7 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -730,7 +730,7 @@ __txn_fixup_prepared_update(WT_SESSION_IMPL *session, WT_TXN_OP *op, WT_CURSOR * } else tombstone = upd; - WT_WITH_BTREE(session, cbt->btree, + WT_WITH_BTREE(session, CUR2BT(cbt), ret = __wt_row_modify(cbt, &cbt->iface.key, NULL, tombstone, WT_UPDATE_INVALID, true)); WT_ERR(ret); tombstone = NULL; @@ -774,7 +774,7 @@ __txn_search_prepared_op( txn = session->txn; cursor = *cursorp; - if (cursor == NULL || ((WT_CURSOR_BTREE *)cursor)->btree->id != op->btree->id) { + if (cursor == NULL || CUR2BT(cursor)->id != op->btree->id) { *cursorp = NULL; if (cursor != NULL) WT_RET(cursor->close(cursor)); diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 1dd95932105..f7b0a46ccb0 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -1029,8 +1029,7 @@ err: * database was idle. */ if (full && logging) { - if (ret == 0 && - F_ISSET(((WT_CURSOR_BTREE *)session->meta_cursor)->btree, WT_BTREE_SKIP_CKPT)) + if (ret == 0 && F_ISSET(CUR2BT(session->meta_cursor), WT_BTREE_SKIP_CKPT)) idle = true; WT_TRET(__wt_txn_checkpoint_log(session, full, (ret == 0 && !idle) ? WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL)); @@ -1829,7 +1828,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) */ if (btree->modified && !bulk && !__wt_btree_immediately_durable(session) && (S2C(session)->txn_global.has_stable_timestamp || - (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata && !final))) + (!F_ISSET(S2C(session), WT_CONN_FILE_CLOSE_SYNC) && !metadata))) return (__wt_set_return(session, EBUSY)); /* diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 90aa7ccae0a..7e1654a0250 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -45,7 +45,7 @@ __txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) key.size = WT_INSERT_KEY_SIZE(cbt->ins); } - WT_ASSERT(session, __wt_compare(session, cbt->btree->collator, &key, &cursor->key, &cmp) == 0); + WT_ASSERT(session, __wt_compare(session, CUR2BT(cbt)->collator, &key, &cursor->key, &cmp) == 0); WT_ASSERT(session, cmp == 0); __wt_buf_free(session, &key); @@ -74,7 +74,7 @@ __txn_op_log( * Log the row- or column-store insert, modify, remove or update. Our caller doesn't log reserve * operations, we shouldn't see them here. */ - if (cbt->btree->type == BTREE_ROW) { + if (CUR2BT(cbt)->type == BTREE_ROW) { #ifdef HAVE_DIAGNOSTIC __txn_op_log_row_key_check(session, cbt); #endif diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 00cd443398b..e51e428ed7f 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -361,7 +361,7 @@ __rollback_abort_row_ondisk_kv( vpack = &_vpack; upd = NULL; __wt_row_leaf_value_cell(session, page, rip, NULL, vpack); - prepared = F_ISSET(vpack, WT_CELL_UNPACK_PREPARE); + prepared = vpack->tw.prepare; if (vpack->tw.durable_start_ts > rollback_timestamp || (vpack->tw.durable_stop_ts == WT_TS_NONE && prepared)) { __wt_verbose(session, WT_VERB_RTS, @@ -679,7 +679,7 @@ __rollback_page_needs_abort( /* Check if the page is obsolete using the page disk address. */ __wt_cell_unpack_addr(session, ref->home->dsk, (WT_CELL *)addr, &vpack); durable_ts = WT_MAX(vpack.ta.newest_start_durable_ts, vpack.ta.newest_stop_durable_ts); - prepared = F_ISSET(&vpack, WT_CELL_UNPACK_PREPARE); + prepared = vpack.ta.prepare; result = (durable_ts > rollback_timestamp) || prepared; } else if (addr != NULL) { tag = "address"; diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 55ef711399a..d9dc1a054cb 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -587,17 +587,16 @@ tasks: vars: directory: test/fops - # Temporarily disabled - # - name: format-test - # tags: ["pull_request"] - # depends_on: - # - name: compile - # commands: - # - func: "fetch artifacts" - # - func: "compile wiredtiger" - # - func: "make check directory" - # vars: - # directory: test/format + - name: format-test + tags: ["pull_request"] + depends_on: + - name: compile + commands: + - func: "fetch artifacts" + - func: "compile wiredtiger" + - func: "make check directory" + vars: + directory: test/format - name: huge-test tags: ["pull_request"] @@ -2069,7 +2068,7 @@ tasks: - func: "format test script" vars: #run for 24 hours ( 24 * 60 = 1440 minutes), use default config - format_test_script_args: -b "SEGFAULT_SIGNALS=all catchsegv ./t" -t 1440 + format_test_script_args: -e "SEGFAULT_SIGNALS=all" -b "catchsegv ./t" -t 1440 - name: format-stress-smoke-test # Set 7 hours timeout @@ -2081,7 +2080,7 @@ tasks: vars: # to emulate the original Jenkins job's test coverage, we are running the smoke test 16 times # run smoke tests, use default config (-S) - format_test_script_args: -b "SEGFAULT_SIGNALS=all catchsegv ./t" -S + format_test_script_args: -e "SEGFAULT_SIGNALS=all" -b "catchsegv ./t" -S times: 16 - name: checkpoint-stress-test @@ -2112,7 +2111,7 @@ tasks: # Make sure we dump core on failure format_test_setting: ulimit -c unlimited #run for 2 hours ( 2 * 60 = 120 minutes), use default config - format_test_script_args: -b "SEGFAULT_SIGNALS=all catchsegv ./t" -t 120 + format_test_script_args: -e "SEGFAULT_SIGNALS=all" -b "catchsegv ./t" -t 120 - name: format-wtperf-test commands: diff --git a/src/third_party/wiredtiger/test/format/Makefile.am b/src/third_party/wiredtiger/test/format/Makefile.am index 0f265fa5a56..034876933f7 100644 --- a/src/third_party/wiredtiger/test/format/Makefile.am +++ b/src/third_party/wiredtiger/test/format/Makefile.am @@ -25,8 +25,7 @@ backup: refresh: rm -rf RUNDIR && cp -p -r BACKUP RUNDIR -# Temporarily disabled -# TESTS = smoke.sh +TESTS = smoke.sh clean-local: rm -rf RUNDIR s_dumpcmp core.* *.core diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c index 9e959dcd823..1f48061d2e2 100644 --- a/src/third_party/wiredtiger/test/format/backup.c +++ b/src/third_party/wiredtiger/test/format/backup.c @@ -299,6 +299,7 @@ copy_blocks(WT_SESSION *session, WT_CURSOR *bkup_c, const char *name) tmp = NULL; } } + testutil_check(ret == WT_NOTFOUND); testutil_check(incr_cur->close(incr_cur)); if (rfd != -1) { error_sys_check(close(rfd)); diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh index b02a58abfb0..0919eab9b03 100755 --- a/src/third_party/wiredtiger/test/format/format.sh +++ b/src/third_party/wiredtiger/test/format/format.sh @@ -17,12 +17,13 @@ onintr() trap 'onintr' 2 usage() { - echo "usage: $0 [-aEFSv] [-b format-binary] [-c config] " + echo "usage: $0 [-aEFSv] [-b format-binary] [-c config] [-e env-var]" echo " [-h home] [-j parallel-jobs] [-n total-jobs] [-t minutes] [format-configuration]" echo echo " -a abort/recovery testing (defaults to off)" echo " -b binary format binary (defaults to "./t")" echo " -c config format configuration file (defaults to CONFIG.stress)" + echo " -e envvar Environment variable setting (default to none)" echo " -E skip known errors (defaults to off)" echo " -F quit on first failure (defaults to off)" echo " -h home run directory (defaults to .)" @@ -77,6 +78,7 @@ timing_stress_split_test=0 total_jobs=0 verbose=0 format_binary="./t" +env_var="" while :; do case "$1" in @@ -89,6 +91,9 @@ while :; do -c) config="$2" shift ; shift ;; + -e) + env_var="$2" + shift ; shift ;; -E) skip_errors=1 shift ;; @@ -437,13 +442,22 @@ format() fi cmd="$format_binary -c "$config" -h "$dir" -1 $args quiet=1" - verbose "$name: $cmd" + echo "$name: $cmd" # Disassociate the command from the shell script so we can exit and let the command # continue to run. # Run format in its own session so child processes are in their own process gorups # and we can individually terminate (and clean up) running jobs and their children. - nohup setsid $cmd > $log 2>&1 & + eval $env_var setsid $cmd > $log 2>&1 & + + # Check for setsid command failed execution, and forcibly quit. + # The RUNDIR is not successfully created in this failure type. + sleep 1 + grep -E -i 'setsid: failed to execute' $log > /dev/null && { + failure=$(($failure + 1)) + force_quit=1 + echo "$name: job in $dir failed to execute" + } } seconds=$((minutes * 60)) @@ -504,4 +518,5 @@ echo "$name: $success successful jobs, $failure failed jobs" verbose "$name: run ending at $(date)" [[ $failure -ne 0 ]] && exit 1 +[[ $success -eq 0 ]] && exit 1 exit 0 diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index b38c7b721bc..e7faddcbd1e 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -486,7 +486,8 @@ prepare_transaction(TINFO *tinfo) { WT_DECL_RET; WT_SESSION *session; - uint64_t longwait, pause_ms, ts; + uint64_t ts; + uint32_t longwait, pause_ms; char buf[64]; session = tinfo->session; diff --git a/src/third_party/wiredtiger/test/format/smoke.sh b/src/third_party/wiredtiger/test/format/smoke.sh index 06bf108dbd2..067ef8e2589 100755 --- a/src/third_party/wiredtiger/test/format/smoke.sh +++ b/src/third_party/wiredtiger/test/format/smoke.sh @@ -11,9 +11,11 @@ args="$args runs.rows=10000 " args="$args runs.source=table " args="$args runs.threads=4 " -$TEST_WRAPPER ./t $args runs.type=fix +# Temporarily disabled +# $TEST_WRAPPER ./t $args runs.type=fix +# $TEST_WRAPPER ./t $args runs.type=row runs.source=lsm +# $TEST_WRAPPER ./t $args runs.type=var + $TEST_WRAPPER ./t $args runs.type=row -$TEST_WRAPPER ./t $args runs.type=row runs.source=lsm -$TEST_WRAPPER ./t $args runs.type=var # Force a rebalance to occur with statistics logging to test the utility $TEST_WRAPPER ./t $args runs.type=row statistics.server=1 ops.rebalance=1 |