summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEtienne Petrel <etienne.petrel@mongodb.com>2022-04-18 03:34:41 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-04-18 06:08:31 +0000
commit456505ebd2581ea8c0bdf1451cc0260d685dab2d (patch)
tree857140df431e0b2809e1bde84c67a9ff6de6a1c4
parente7b7713e9757cf07c20125c78d948663c10c6985 (diff)
downloadmongo-456505ebd2581ea8c0bdf1451cc0260d685dab2d.tar.gz
Import wiredtiger: a0eaa7957bb8ac7eca814b89fa406c17f857095b from branch mongodb-4.4
ref: 638aa27c3f..a0eaa7957b for: 4.4.14 WT-8362 Remove or rewrite HS entries of a key when OOO tombstone is written to datastore WT-8450 Report stats in hs_cleanup_stress, don't validate them WT-8708 Fix timestamp usage error in test/checkpoint WT-8824 Disable code coverage measurement on mongodb-4.4 WT-8879 Set the OOO flag when the selected tombstone is globally visible WT-8909 Disable cpp test search_near_01 on 4.4 WT-8924 Don't check against on disk time window if there is an insert list when checking for conflicts in row-store
-rw-r--r--src/third_party/wiredtiger/dist/test_data.py1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/config/test_config.c31
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c50
-rw-r--r--src/third_party/wiredtiger/src/include/api.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h7
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h11
-rw-r--r--src/third_party/wiredtiger/src/include/session.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn_inline.h7
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c58
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c48
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c27
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c105
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c2
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/checkpointer.c12
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h4
-rw-r--r--src/third_party/wiredtiger/test/checkpoint/workers.c8
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt6
-rwxr-xr-xsrc/third_party/wiredtiger/test/cppsuite/tests/run.cxx7
-rw-r--r--src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx2
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs29.py94
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs31.py182
23 files changed, 507 insertions, 165 deletions
diff --git a/src/third_party/wiredtiger/dist/test_data.py b/src/third_party/wiredtiger/dist/test_data.py
index f35d400be4e..5ce241fb6e4 100644
--- a/src/third_party/wiredtiger/dist/test_data.py
+++ b/src/third_party/wiredtiger/dist/test_data.py
@@ -220,6 +220,5 @@ methods = {
How long the insertions will occur for.''')]),
'example_test' : Method(test_config),
'hs_cleanup' : Method(test_config),
- 'search_near_01' : Method(test_config),
'search_near_02' : Method(test_config),
}
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 4f3cb88e859..a653e281545 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "638aa27c3f2893cfb422791b264a71459539e036"
+ "commit": "a0eaa7957bb8ac7eca814b89fa406c17f857095b"
}
diff --git a/src/third_party/wiredtiger/src/config/test_config.c b/src/third_party/wiredtiger/src/config/test_config.c
index a9f954ace43..b0c6245bb10 100644
--- a/src/third_party/wiredtiger/src/config/test_config.c
+++ b/src/third_party/wiredtiger/src/config/test_config.c
@@ -126,19 +126,6 @@ static const WT_CONFIG_CHECK confchk_hs_cleanup[] = {
{"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 2},
{NULL, NULL, NULL, NULL, NULL, 0}};
-static const WT_CONFIG_CHECK confchk_search_near_01[] = {
- {"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
- {"checkpoint_manager", "category", NULL, NULL, confchk_checkpoint_manager_subconfigs, 2},
- {"compression_enabled", "boolean", NULL, NULL, NULL, 0},
- {"duration_seconds", "int", NULL, "min=0,max=1000000", NULL, 0},
- {"enable_logging", "boolean", NULL, NULL, NULL, 0},
- {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 5},
- {"statistics_config", "category", NULL, NULL, confchk_statistics_config_subconfigs, 2},
- {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 4},
- {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 6},
- {"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 2},
- {NULL, NULL, NULL, NULL, NULL, 0}};
-
static const WT_CONFIG_CHECK confchk_search_near_02[] = {
{"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
{"checkpoint_manager", "category", NULL, NULL, confchk_checkpoint_manager_subconfigs, 2},
@@ -226,24 +213,6 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"ops_per_transaction=(max=1,min=0),thread_count=0,value_size=5)),"
"workload_tracking=(enabled=true,op_rate=1s)",
confchk_hs_cleanup, 10},
- {"search_near_01",
- "cache_size_mb=0,checkpoint_manager=(enabled=false,op_rate=1s),"
- "compression_enabled=false,duration_seconds=0,"
- "enable_logging=false,runtime_monitor=(enabled=true,op_rate=1s,"
- "postrun_statistics=[],stat_cache_size=(enabled=false,limit=0),"
- "stat_db_size=(enabled=false,limit=0)),"
- "statistics_config=(enable_logging=true,type=all),"
- "timestamp_manager=(enabled=true,oldest_lag=1,op_rate=1s,"
- "stable_lag=1),workload_generator=(enabled=true,"
- "insert_config=(key_size=5,op_rate=1s,ops_per_transaction=(max=1,"
- "min=0),thread_count=0,value_size=5),op_rate=1s,"
- "populate_config=(collection_count=1,key_count_per_collection=0,"
- "key_size=5,thread_count=1,value_size=5),read_config=(key_size=5,"
- "op_rate=1s,ops_per_transaction=(max=1,min=0),thread_count=0,"
- "value_size=5),update_config=(key_size=5,op_rate=1s,"
- "ops_per_transaction=(max=1,min=0),thread_count=0,value_size=5)),"
- "workload_tracking=(enabled=true,op_rate=1s)",
- confchk_search_near_01, 10},
{"search_near_02",
"cache_size_mb=0,checkpoint_manager=(enabled=false,op_rate=1s),"
"compression_enabled=false,duration_seconds=0,"
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 9ac013ef617..a98bbc7c7a2 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -9,8 +9,8 @@
#include "wt_internal.h"
static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts,
- uint64_t *hs_counter);
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone,
+ bool error_on_ooo_ts, uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -208,8 +208,8 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
}
if (ret == 0)
- WT_ERR(__hs_delete_reinsert_from_pos(
- session, cursor, btree->id, key, tw->start_ts + 1, true, error_on_ooo_ts, &counter));
+ WT_ERR(__hs_delete_reinsert_from_pos(session, cursor, btree->id, key, tw->start_ts + 1,
+ true, false, error_on_ooo_ts, &counter));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -533,7 +533,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) {
/* Delete and reinsert any update of the key with a higher timestamp. */
WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key,
- fix_ts_upd->start_ts + 1, true, error_on_ooo_ts));
+ fix_ts_upd->start_ts + 1, true, false, error_on_ooo_ts));
F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS);
}
}
@@ -782,7 +782,7 @@ err:
*/
int
__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts)
{
WT_DECL_RET;
WT_ITEM hs_key;
@@ -792,10 +792,10 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
bool hs_read_all_flag;
/*
- * If we will delete all the updates of the key from the history store, we should not reinsert
- * any update.
+ * If we delete all the updates of the key from the history store, we should not reinsert any
+ * update except when an out-of-order tombstone is not globally visible yet.
*/
- WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+ WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert);
hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL);
@@ -815,8 +815,8 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
++hs_counter;
}
- WT_ERR(__hs_delete_reinsert_from_pos(
- session, hs_cursor, btree_id, key, ts, reinsert, error_on_ooo_ts, &hs_counter));
+ WT_ERR(__hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert,
+ ooo_tombstone, error_on_ooo_ts, &hs_counter));
done:
err:
@@ -834,7 +834,8 @@ err:
*/
static int
__hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts, uint64_t *counter)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts,
+ uint64_t *counter)
{
WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -858,9 +859,11 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
WT_UNUSED(key);
#endif
- /* If we will delete all the updates of the key from the history store, we should not reinsert
- * any update. */
- WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+ /*
+ * If we delete all the updates of the key from the history store, we should not reinsert any
+ * update except when an out-of-order tombstone is not globally visible yet.
+ */
+ WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert);
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* Ignore records that are obsolete. */
@@ -971,7 +974,16 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
__wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
__wt_timestamp_to_string(ts, ts_string[4]));
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts - 1;
+ /*
+ * Use the original start time window's timestamps if it isn't out of order with respect
+ * to the new update.
+ */
+ if (hs_cbt->upd_value->tw.start_ts >= ts)
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ooo_tombstone ? ts : ts - 1;
+ else {
+ hs_insert_tw.start_ts = hs_cbt->upd_value->tw.start_ts;
+ hs_insert_tw.durable_start_ts = hs_cbt->upd_value->tw.durable_start_ts;
+ }
hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
/*
@@ -979,13 +991,17 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
* another moved update OR the update itself triggered the correction. In either case,
* we should preserve the stop transaction id.
*/
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts - 1;
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ooo_tombstone ? ts : ts - 1;
hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
/* Extract the underlying value for reinsertion. */
WT_ERR(hs_cursor->get_value(
hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+ /* Reinsert the update with corrected timestamps. */
+ if (ooo_tombstone && hs_ts == ts)
+ *counter = hs_counter;
+
/* Insert the value back with different timestamps. */
hs_insert_cursor->set_key(
hs_insert_cursor, 4, btree_id, &hs_key, hs_insert_tw.start_ts, *counter);
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 6f9c350afd6..7f18ccabcb1 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -89,7 +89,7 @@
* We should not leave any history store cursor open when return from an api call. \
* However, we cannot do a stricter check before WT-7247 is resolved. \
*/ \
- WT_ASSERT(s, (s)->api_call_counter > 1 || (s)->hs_cursor_counter <= 2); \
+ WT_ASSERT(s, (s)->api_call_counter > 1 || (s)->hs_cursor_counter <= 3); \
/* \
* No code after this line, otherwise error handling \
* won't be correct. \
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index c3cd6385b1a..ffd9a61a4e1 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -780,8 +780,8 @@ extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *
extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone,
+ bool error_on_ooo_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, WT_ITEM *base_value_buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1232,6 +1232,9 @@ extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val,
WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ wt_timestamp_t ts, uint64_t recno, WT_ITEM *rowkey, bool reinsert)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref,
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index ca94b8a1478..70664653e5d 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -284,6 +284,14 @@ struct __wt_reconcile {
bool rec_page_cell_with_ts;
bool rec_page_cell_with_txn_id;
bool rec_page_cell_with_prepared_txn;
+
+ /*
+ * When removing a key due to a tombstone with a durable timestamp of "none", we also remove the
+ * history store contents associated with that key. Keep the pertinent state here: a flag to say
+ * whether this is appropriate, and a cached history store cursor for doing it.
+ */
+ bool hs_clear_on_tombstone;
+ WT_CURSOR *hs_cursor;
};
typedef struct {
@@ -291,7 +299,8 @@ typedef struct {
WT_TIME_WINDOW tw;
- bool upd_saved; /* An element on the row's update chain was saved */
+ bool upd_saved; /* An element on the row's update chain was saved */
+ bool ooo_tombstone; /* Out-of-order tombstone */
} WT_UPDATE_SELECT;
/*
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 9868964597e..5206910f139 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -159,7 +159,7 @@ struct __wt_session_impl {
size_t op_handle_allocated; /* Bytes allocated */
void *reconcile; /* Reconciliation support */
- void (*reconcile_cleanup)(WT_SESSION_IMPL *);
+ int (*reconcile_cleanup)(WT_SESSION_IMPL *);
/* Salvage support. */
void *salvage_track;
diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h
index a6b13708057..c9247515bb8 100644
--- a/src/third_party/wiredtiger/src/include/txn_inline.h
+++ b/src/third_party/wiredtiger/src/include/txn_inline.h
@@ -1353,8 +1353,13 @@ __wt_txn_modify_check(
* Check conflict against any on-page value if there is no update on the update chain except
* aborted updates. Otherwise, we would have either already detected a conflict if we saw an
* uncommitted update or determined that it would be safe to write if we saw a committed update.
+ *
+ * In the case of row-store we also need to check that the insert list is empty as the existence
+ * of it implies there is no on disk value for the given key. However we can still get a
+ * time-window from an unrelated on-disk value if we are not careful as the slot can still be
+ * set on the cursor b-tree.
*/
- if (!rollback && upd == NULL) {
+ if (!rollback && upd == NULL && (CUR2BT(cbt)->type != BTREE_ROW || cbt->ins == NULL)) {
__wt_read_cell_time_window(cbt, &tw, &tw_found);
if (tw_found) {
if (WT_TIME_WINDOW_HAS_STOP(&tw))
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index 7b716c4808a..dfca67b3ea6 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -580,20 +580,17 @@ __wt_rec_col_var(
WT_CELL *cell;
WT_CELL_UNPACK_KV *vpack, _vpack;
WT_COL *cip;
- WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(orig);
WT_DECL_RET;
WT_INSERT *ins;
- WT_ITEM hs_recno_key;
WT_PAGE *page;
WT_TIME_WINDOW clear_tw, *twp;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
uint32_t i, size;
- uint8_t *p, hs_recno_key_buf[WT_INTPACK64_MAXSIZE];
- bool deleted, hs_clear, orig_deleted, update_no_copy;
+ bool deleted, orig_deleted, update_no_copy;
const void *data;
btree = S2BT(session);
@@ -625,11 +622,6 @@ __wt_rec_col_var(
* they shouldn't open new dhandles. In those cases we won't ever need to blow away history
* store content, so we can skip this.
*/
- hs_cursor = NULL;
- hs_clear = F_ISSET(S2C(session), WT_CONN_HS_OPEN) &&
- !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES) && !WT_IS_HS(btree->dhandle) &&
- !WT_IS_METADATA(btree->dhandle);
-
WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage_precomp));
WT_RET(__wt_scr_alloc(session, 0, &orig));
@@ -816,41 +808,25 @@ record_loop:
case WT_UPDATE_STANDARD:
data = upd->data;
size = upd->size;
+ /*
+ * When an out-of-order or mixed-mode tombstone is getting written to disk,
+ * remove any historical versions that are greater in the history store for this
+ * key.
+ */
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, src_recno, NULL, true));
+
break;
case WT_UPDATE_TOMBSTONE:
/*
- * When removing a key due to a tombstone with a durable timestamp of "none",
- * also remove the history store contents associated with that key.
+ * When an out-of-order or mixed-mode tombstone is getting written to disk,
+ * remove any historical versions that are greater in the history store for this
+ * key.
*/
- if (twp->durable_stop_ts == WT_TS_NONE && hs_clear) {
- p = hs_recno_key_buf;
- WT_ERR(__wt_vpack_uint(&p, 0, src_recno));
- hs_recno_key.data = hs_recno_key_buf;
- hs_recno_key.size = WT_PTRDIFF(p, hs_recno_key_buf);
-
- /* Open a history store cursor if we don't yet have one. */
- if (hs_cursor == NULL)
- WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
-
- /*
- * From WT_TS_NONE delete all the history store content of the key. This
- * path will never be taken for a mixed-mode deletion being evicted and with
- * a checkpoint that started prior to the eviction starting its
- * reconciliation as previous checks done while selecting an update will
- * detect that.
- */
- WT_ERR(__wt_hs_delete_key_from_ts(
- session, hs_cursor, btree->id, &hs_recno_key, WT_TS_NONE, false, false));
-
- /* Fail 0.01% of the time. */
- if (F_ISSET(r, WT_REC_EVICT) &&
- __wt_failpoint(session,
- WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS, 0.01))
- WT_ERR(EBUSY);
-
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
- }
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, src_recno, NULL, false));
deleted = true;
twp = &clear_tw;
@@ -1077,8 +1053,6 @@ next:
ret = __wt_rec_split_finish(session, r);
err:
- if (hs_cursor != NULL)
- WT_TRET(hs_cursor->close(hs_cursor));
__wt_scr_free(session, &orig);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index a977b7d088c..60c5722398e 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -710,7 +710,6 @@ __wt_rec_row_leaf(
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK_KV *kpack, _kpack, *vpack, _vpack;
- WT_CURSOR *hs_cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_ITEM(tmpkey);
WT_DECL_RET;
@@ -726,7 +725,7 @@ __wt_rec_row_leaf(
uint64_t slvg_skip;
uint32_t i;
uint8_t key_prefix;
- bool dictionary, hs_clear, key_onpage_ovfl, ovfl_key;
+ bool dictionary, key_onpage_ovfl, ovfl_key;
void *copy;
const void *key_data;
@@ -755,11 +754,6 @@ __wt_rec_row_leaf(
* they shouldn't open new dhandles. In those cases we won't ever need to blow away history
* store content, so we can skip this.
*/
- hs_cursor = NULL;
- hs_clear = F_ISSET(S2C(session), WT_CONN_HS_OPEN) &&
- !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES) && !WT_IS_HS(btree->dhandle) &&
- !WT_IS_METADATA(btree->dhandle);
-
WT_RET(__wt_rec_split_init(session, r, page, 0, btree->maxleafpage_precomp));
/*
@@ -896,6 +890,15 @@ __wt_rec_row_leaf(
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, twp, 0));
+ /*
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove
+ * any historical versions that are greater in the history store for that key.
+ */
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, true));
+ }
dictionary = true;
break;
case WT_UPDATE_TOMBSTONE:
@@ -920,32 +923,13 @@ __wt_rec_row_leaf(
}
/*
- * When removing a key due to a tombstone with a durable timestamp of "none", also
- * remove the history store contents associated with that key.
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove
+ * any historical versions that are greater in the history store for this key.
*/
- if (twp->durable_stop_ts == WT_TS_NONE && hs_clear) {
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) {
WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
-
- /* Open a history store cursor if we don't yet have one. */
- if (hs_cursor == NULL)
- WT_ERR(__wt_curhs_open(session, NULL, &hs_cursor));
-
- /*
- * From WT_TS_NONE delete all the history store content of the key. This path
- * will never be taken for a mixed-mode deletion being evicted and with a
- * checkpoint that started prior to the eviction starting its reconciliation as
- * previous checks done while selecting an update will detect that.
- */
- WT_ERR(__wt_hs_delete_key_from_ts(
- session, hs_cursor, btree->id, tmpkey, WT_TS_NONE, false, false));
-
- /* Fail 0.01% of the time. */
- if (F_ISSET(r, WT_REC_EVICT) &&
- __wt_failpoint(
- session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS, 0.01))
- WT_ERR(EBUSY);
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, false));
}
/*
@@ -1077,8 +1061,6 @@ leaf_insert:
ret = __wt_rec_split_finish(session, r);
err:
- if (hs_cursor != NULL)
- WT_TRET(hs_cursor->close(hs_cursor));
__wt_scr_free(session, &tmpkey);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index aed99b9f906..ba947c13ac4 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -402,6 +402,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
*/
upd_select->upd = NULL;
upd_select->upd_saved = false;
+ upd_select->ooo_tombstone = false;
select_tw = &upd_select->tw;
WT_TIME_WINDOW_INIT(select_tw);
@@ -699,6 +700,32 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
WT_ERR(__rec_validate_upd_chain(session, r, onpage_upd, select_tw, vpack));
/*
+ * Set the flag if the selected tombstone is an out-of-order or mixed mode to an update. Based
+ * on this flag, the caller functions perform the history store truncation for this key.
+ */
+ if (!is_hs_page && tombstone != NULL &&
+ !F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
+ upd = upd_select->upd;
+
+ /*
+ * The selected update can be the tombstone itself when the tombstone is globally visible.
+ * Compare the tombstone's timestamp with either the next update in the update list or the
+ * on-disk cell timestamp to determine if the tombstone is an out-of-order or mixed mode.
+ */
+ if (tombstone == upd) {
+ upd = upd->next;
+
+ /* Loop until a valid update is found. */
+ while (upd != NULL && upd->txnid == WT_TXN_ABORTED)
+ upd = upd->next;
+ }
+
+ if ((upd != NULL && upd->start_ts > tombstone->start_ts) ||
+ (vpack != NULL && vpack->tw.start_ts > tombstone->start_ts))
+ upd_select->ooo_tombstone = true;
+ }
+
+ /*
* Fixup any out of order timestamps, assert that checkpoint wasn't running when this round of
* reconciliation started.
*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 1cd1a3be5d0..60d2e51da83 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -8,9 +8,9 @@
#include "wt_internal.h"
-static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
-static void __rec_destroy(WT_SESSION_IMPL *, void *);
-static void __rec_destroy_session(WT_SESSION_IMPL *);
+static int __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
+static int __rec_destroy(WT_SESSION_IMPL *, void *);
+static int __rec_destroy_session(WT_SESSION_IMPL *);
static int __rec_init(WT_SESSION_IMPL *, WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
static int __rec_hs_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
@@ -265,7 +265,7 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u
btree->rec_multiblock_max = r->multi_next;
/* Clean up the reconciliation structure. */
- __rec_cleanup(session, r);
+ WT_RET(__rec_cleanup(session, r));
/*
* When threads perform eviction, don't cache block manager structures (even across calls), we
@@ -675,6 +675,22 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
r->rec_page_cell_with_txn_id = false;
r->rec_page_cell_with_prepared_txn = false;
+ /*
+ * When removing a key due to a tombstone with a durable timestamp of "none", also remove the
+ * history store contents associated with that key. It's safe to do even if we fail
+ * reconciliation after the removal, the history store content must be obsolete in order for us
+ * to consider removing the key.
+ *
+ * Ignore if this is metadata, as metadata doesn't have any history.
+ *
+ * Some code paths, such as schema removal, involve deleting keys in metadata and assert that
+ * they shouldn't open new dhandles. In those cases we won't ever need to blow away history
+ * store content, so we can skip this.
+ */
+ r->hs_clear_on_tombstone = F_ISSET(S2C(session), WT_CONN_HS_OPEN) &&
+ !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES) && !WT_IS_HS(btree->dhandle) &&
+ !WT_IS_METADATA(btree->dhandle);
+
/*
* If we allocated the reconciliation structure and there was an error, clean up. If our caller
* passed in a structure, they own it.
@@ -684,8 +700,8 @@ err:
if (ret == 0)
*(WT_RECONCILE **)reconcilep = r;
else {
- __rec_cleanup(session, r);
- __rec_destroy(session, &r);
+ WT_TRET(__rec_cleanup(session, r));
+ WT_TRET(__rec_destroy(session, &r));
}
}
@@ -696,7 +712,7 @@ err:
* __rec_cleanup --
* Clean up after a reconciliation run, except for structures cached across runs.
*/
-static void
+static int
__rec_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
WT_BTREE *btree;
@@ -705,6 +721,9 @@ __rec_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
btree = S2BT(session);
+ if (r->hs_cursor != NULL)
+ WT_RET(r->hs_cursor->reset(r->hs_cursor));
+
if (btree->type == BTREE_ROW)
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
__wt_free(session, multi->key.ikey);
@@ -717,19 +736,25 @@ __rec_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
/* Reconciliation is not re-entrant, make sure that doesn't happen. */
r->ref = NULL;
+
+ return (0);
}
/*
* __rec_destroy --
* Clean up the reconciliation structure.
*/
-static void
+static int
__rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
{
WT_RECONCILE *r;
if ((r = *(WT_RECONCILE **)reconcilep) == NULL)
- return;
+ return (0);
+
+ if (r->hs_cursor != NULL)
+ WT_RET(r->hs_cursor->close(r->hs_cursor));
+
*(WT_RECONCILE **)reconcilep = NULL;
__wt_buf_free(session, &r->chunk_A.key);
@@ -752,16 +777,18 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
__wt_buf_free(session, &r->update_modify_cbt._upd_value.buf);
__wt_free(session, r);
+
+ return (0);
}
/*
* __rec_destroy_session --
* Clean up the reconciliation structure, session version.
*/
-static void
+static int
__rec_destroy_session(WT_SESSION_IMPL *session)
{
- __rec_destroy(session, &session->reconcile);
+ return (__rec_destroy(session, &session->reconcile));
}
/*
@@ -2005,8 +2032,8 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
__wt_page_modify_set(session, parent);
err:
- __rec_cleanup(session, r);
- __rec_destroy(session, &cbulk->reconcile);
+ WT_TRET(__rec_cleanup(session, r));
+ WT_TRET(__rec_destroy(session, &cbulk->reconcile));
return (ret);
}
@@ -2428,3 +2455,55 @@ err:
__wt_scr_free(session, &tmp);
return (ret);
}
+
+/*
+ * __wt_rec_hs_clear_on_tombstone --
+ * When removing a key due to a tombstone with a durable timestamp of "none", also remove the
+ * history store contents associated with that key.
+ */
+int
+__wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t ts,
+ uint64_t recno, WT_ITEM *rowkey, bool reinsert)
+{
+ WT_BTREE *btree;
+ WT_ITEM hs_recno_key, *key;
+ uint8_t hs_recno_key_buf[WT_INTPACK64_MAXSIZE], *p;
+
+ btree = S2BT(session);
+
+ /* We should be passed a recno or a row-store key, but not both. */
+ WT_ASSERT(session, (recno == WT_RECNO_OOB) != (rowkey == NULL));
+
+ if (rowkey != NULL)
+ key = rowkey;
+ else {
+ p = hs_recno_key_buf;
+ WT_RET(__wt_vpack_uint(&p, 0, recno));
+ hs_recno_key.data = hs_recno_key_buf;
+ hs_recno_key.size = WT_PTRDIFF(p, hs_recno_key_buf);
+ key = &hs_recno_key;
+ }
+
+ /* Open a history store cursor if we don't yet have one. */
+ if (r->hs_cursor == NULL)
+ WT_RET(__wt_curhs_open(session, NULL, &r->hs_cursor));
+
+ /*
+ * From WT_TS_NONE delete all the history store content of the key. This path will never be
+ * taken for a mixed-mode deletion being evicted and with a checkpoint that started prior to the
+ * eviction starting its reconciliation as previous checks done while selecting an update will
+ * detect that.
+ */
+ WT_RET(__wt_hs_delete_key_from_ts(session, r->hs_cursor, btree->id, key, ts, reinsert, true,
+ F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)));
+
+ /* Fail 0.01% of the time. */
+ if (F_ISSET(r, WT_REC_EVICT) &&
+ __wt_failpoint(session, WT_TIMING_STRESS_FAILPOINT_HISTORY_STORE_DELETE_KEY_FROM_TS, 1))
+ return (EBUSY);
+
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_onpage_removal);
+ WT_STAT_DATA_INCR(session, cache_hs_key_truncate_onpage_removal);
+
+ return (0);
+}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index b6058b868c9..03827e3e76d 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -178,7 +178,7 @@ __wt_session_release_resources(WT_SESSION_IMPL *session)
/* Reconciliation cleanup */
if (session->reconcile_cleanup != NULL)
- session->reconcile_cleanup(session);
+ WT_TRET(session->reconcile_cleanup(session));
/* Stashed memory. */
__wt_stash_discard(session);
diff --git a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
index 1c8b5135d49..a07604a6520 100644
--- a/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
+++ b/src/third_party/wiredtiger/test/checkpoint/checkpointer.c
@@ -44,10 +44,10 @@ set_stable(void)
char buf[128];
if (g.race_timetamps)
- testutil_check(__wt_snprintf(
- buf, sizeof(buf), "stable_timestamp=%x,oldest_timestamp=%x", g.ts_stable, g.ts_stable));
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "stable_timestamp=%" PRIx64 ",oldest_timestamp=%" PRIx64, g.ts_stable, g.ts_stable));
else
- testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%x", g.ts_stable));
+ testutil_check(__wt_snprintf(buf, sizeof(buf), "stable_timestamp=%" PRIx64, g.ts_stable));
testutil_check(g.conn->set_timestamp(g.conn, buf));
}
@@ -202,7 +202,9 @@ real_checkpointer(void)
verify_ts = stable_ts;
else
verify_ts = __wt_random(&rnd) % (stable_ts - oldest_ts + 1) + oldest_ts;
- WT_ORDERED_READ(g.ts_oldest, g.ts_stable);
+ __wt_writelock((WT_SESSION_IMPL *)session, &g.clock_lock);
+ g.ts_oldest = g.ts_stable;
+ __wt_writeunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
}
/* Execute a checkpoint */
@@ -225,7 +227,7 @@ real_checkpointer(void)
/* Advance the oldest timestamp to the most recently set stable timestamp. */
if (g.use_timestamps && g.ts_oldest != 0) {
testutil_check(__wt_snprintf(
- timestamp_buf, sizeof(timestamp_buf), "oldest_timestamp=%x", g.ts_oldest));
+ timestamp_buf, sizeof(timestamp_buf), "oldest_timestamp=%" PRIx64, g.ts_oldest));
testutil_check(g.conn->set_timestamp(g.conn, timestamp_buf));
}
/* Random value between 4 and 8 seconds. */
diff --git a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
index b3b65c5d828..93f76ec5e1c 100644
--- a/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
+++ b/src/third_party/wiredtiger/test/checkpoint/test_checkpoint.h
@@ -71,8 +71,8 @@ typedef struct {
bool hs_checkpoint_timing_stress; /* History store checkpoint timing stress */
bool reserved_txnid_timing_stress; /* Reserved transaction id timing stress */
bool checkpoint_slow_timing_stress; /* Checkpoint slow timing stress */
- u_int ts_oldest; /* Current oldest timestamp */
- u_int ts_stable; /* Current stable timestamp */
+ uint64_t ts_oldest; /* Current oldest timestamp */
+ uint64_t ts_stable; /* Current stable timestamp */
bool mixed_mode_deletes; /* Run with mixed mode deletes */
bool use_timestamps; /* Use txn timestamps */
bool race_timetamps; /* Async update to oldest timestamp */
diff --git a/src/third_party/wiredtiger/test/checkpoint/workers.c b/src/third_party/wiredtiger/test/checkpoint/workers.c
index 3c9313c4c99..5ed22005fe5 100644
--- a/src/third_party/wiredtiger/test/checkpoint/workers.c
+++ b/src/third_party/wiredtiger/test/checkpoint/workers.c
@@ -369,18 +369,18 @@ real_worker(void)
next_rnd = __wt_random(&rnd);
if (g.prepare && next_rnd % 2 == 0) {
testutil_check(__wt_snprintf(
- buf, sizeof(buf), "prepare_timestamp=%x", g.ts_stable + 1));
+ buf, sizeof(buf), "prepare_timestamp=%" PRIx64, g.ts_stable + 1));
if ((ret = session->prepare_transaction(session, buf)) != 0) {
__wt_readunlock((WT_SESSION_IMPL *)session, &g.clock_lock);
(void)log_print_err("real_worker:prepare_transaction", ret, 1);
goto err;
}
testutil_check(__wt_snprintf(buf, sizeof(buf),
- "durable_timestamp=%x,commit_timestamp=%x", g.ts_stable + 3,
- g.ts_stable + 1));
+ "durable_timestamp=%" PRIx64 ",commit_timestamp=%" PRIx64,
+ g.ts_stable + 3, g.ts_stable + 1));
} else
testutil_check(__wt_snprintf(
- buf, sizeof(buf), "commit_timestamp=%x", g.ts_stable + 1));
+ buf, sizeof(buf), "commit_timestamp=%" PRIx64, g.ts_stable + 1));
// Commit majority of times
if (next_rnd % 49 != 0) {
diff --git a/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt b/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
index 97de15635c5..ee2e6795df7 100644
--- a/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
+++ b/src/third_party/wiredtiger/test/cppsuite/configs/hs_cleanup_stress.txt
@@ -18,19 +18,19 @@ runtime_monitor=
(
stat_cache_size=
(
- enabled=true,
+ enabled=false,
limit=110
),
# The data files compress to around 25MB per table at the end of a run so 250MB total.
# +1.4GB for the history store. With an additional 150MB margin.
stat_db_size=
(
- enabled=true,
+ enabled=false,
limit=1900000000,
),
# Seems to insert around 477K records. Give it +-20K margin.
# Seems to remove 180K records. Give it a similar margin.
- postrun_statistics=[cache_hs_insert:457000:497000, cc_pages_removed:170000:200000]
+ #postrun_statistics=[cache_hs_insert:457000:497000, cc_pages_removed:170000:200000]
),
timestamp_manager=
(
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
index 627921a9aa4..caa0153333c 100755
--- a/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/run.cxx
@@ -37,7 +37,6 @@
#include "burst_inserts.cxx"
#include "example_test.cxx"
#include "hs_cleanup.cxx"
-#include "search_near_01.cxx"
#include "search_near_02.cxx"
std::string
@@ -121,8 +120,6 @@ run_test(const std::string &test_name, const std::string &config, const std::str
hs_cleanup(test_harness::test_args{config, test_name, wt_open_config}).run();
else if (test_name == "burst_inserts")
burst_inserts(test_harness::test_args{config, test_name, wt_open_config}).run();
- else if (test_name == "search_near_01")
- search_near_01(test_harness::test_args{config, test_name, wt_open_config}).run();
else {
test_harness::logger::log_msg(LOG_ERROR, "Test not found: " + test_name);
error_code = -1;
@@ -145,8 +142,8 @@ main(int argc, char *argv[])
{
std::string cfg, config_filename, current_cfg, current_test_name, test_name, wt_open_config;
int64_t error_code = 0;
- const std::vector<std::string> all_tests = {"base_test", "burst_inserts", "example_test",
- "hs_cleanup", "search_near_01", "search_near_02"};
+ const std::vector<std::string> all_tests = {
+ "base_test", "burst_inserts", "example_test", "hs_cleanup", "search_near_02"};
/* Set the program name for error messages. */
(void)testutil_set_progname(argv);
diff --git a/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx b/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx
index 20bcebff4a0..8ddc57a57d0 100644
--- a/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx
+++ b/src/third_party/wiredtiger/test/cppsuite/tests/search_near_01.cxx
@@ -34,6 +34,8 @@
using namespace test_harness;
/*
+ * Disabled as part of WT-8909.
+ *
* In this test, we want to verify that search_near with prefix enabled only traverses the portion
* of the tree that follows the prefix portion of the search key. The test is composed of a populate
* phase followed by a read phase. The populate phase will insert a set of random generated keys
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 208b1a2d3f7..f870a1f96e7 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -1050,7 +1050,6 @@ tasks:
set -o errexit
set -o verbose
- ${test_env_vars|} $(pwd)/test/cppsuite/run -t search_near_01 -f test/cppsuite/configs/search_near_01_default.txt -l 2
${test_env_vars|} $(pwd)/test/cppsuite/run -t search_near_02 -f test/cppsuite/configs/search_near_02_default.txt -l 2
- name: cppsuite-base-test-stress
@@ -3456,6 +3455,9 @@ buildvariants:
- name: code-statistics
display_name: "Code statistics"
+ # Code coverage and complexity metrics are not required on the mongodb-4.4 branch as
+ # they are only required on the 'develop' branch.
+ activate: false
batchtime: 10080 # 7 days
run_on:
- ubuntu2004-test
diff --git a/src/third_party/wiredtiger/test/suite/test_hs29.py b/src/third_party/wiredtiger/test/suite/test_hs29.py
new file mode 100644
index 00000000000..da81c6a94d3
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs29.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wttest
+
+# test_hs29.py
+# It is possible to end up with 3 opened history store cursors at the same time when the following
+# occurs:
+# - The reconciliation process opens one history store cursor.
+# - The function hs_delete_reinsert_from_pos creates a history store cursor too. This means we need
+# an update with an OOO timestamp to trigger that function.
+# - The function wt_rec_hs_clear_on_tombstone creates a history store cursor as well. This means we
+# need a tombstone to trigger the function, i.e a deleted key.
+class test_hs29(wttest.WiredTigerTestCase):
+
+ def test_3_hs_cursors(self):
+
+ # Create a table.
+ uri = "table:test_hs_cursor"
+ self.session.create(uri, 'key_format=S,value_format=S')
+
+ # Open one cursor to operate on the table and another one to perform eviction.
+ cursor = self.session.open_cursor(uri)
+ cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+
+ # Create two keys and perform an update on each.
+ self.session.begin_transaction()
+ cursor['1'] = '1'
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(2))
+
+ self.session.begin_transaction()
+ cursor['1'] = '11'
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(3))
+
+ self.session.begin_transaction()
+ cursor['2'] = '2'
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(10))
+
+ self.session.begin_transaction()
+ cursor['2'] = '22'
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20))
+
+ # Perform eviction.
+ cursor2.set_key('1')
+ self.assertEqual(cursor2.search(), 0)
+ self.assertEqual(cursor2.get_value(), '11')
+ self.assertEqual(cursor2.reset(), 0)
+
+ cursor2.set_key('2')
+ self.assertEqual(cursor2.search(), 0)
+ self.assertEqual(cursor2.get_value(), '22')
+ self.assertEqual(cursor2.reset(), 0)
+
+ # Remove the first key without giving a ts.
+ self.session.begin_transaction()
+ cursor.set_key('1')
+ cursor.remove()
+ self.session.commit_transaction()
+
+ # Update the second key with out of order timestamp.
+ self.session.begin_transaction()
+ cursor['2'] = '222'
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5))
+
+ # Close the connection to trigger a final checkpoint and reconciliation.
+ self.conn.close()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_hs31.py b/src/third_party/wiredtiger/test/suite/test_hs31.py
new file mode 100644
index 00000000000..7780a187dc9
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs31.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+from wiredtiger import stat
+
+# test_hs31.py
+# Ensure that tombstone with out of order timestamp clear the history store records.
+class test_hs31(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=5MB,statistics=(all)'
+ format_values = [
+ ('column', dict(key_format='r', value_format='S')),
+ # ('column-fix', dict(key_format='r', value_format='8t')),
+ ('integer-row', dict(key_format='i', value_format='S')),
+ ('string-row', dict(key_format='S', value_format='S')),
+ ]
+
+ ooo_values = [
+ ('out-of-order', dict(ooo_value=True)),
+ ('mixed-mode', dict(ooo_value=False)),
+ ]
+
+ globally_visible_before_ckpt_values = [
+ ('globally_visible_before_ckpt', dict(globally_visible_before_ckpt=True)),
+ ('no_globally_visible_before_ckpt', dict(globally_visible_before_ckpt=False)),
+ ]
+
+ scenarios = make_scenarios(format_values, ooo_values, globally_visible_before_ckpt_values)
+ nrows = 1000
+
+ def create_key(self, i):
+ if self.key_format == 'S':
+ return str(i)
+ return i
+
+ def get_stat(self, stat):
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def test_ooo_tombstone_clear_hs(self):
+ uri = 'file:test_hs31'
+ create_params = 'key_format={},value_format={}'.format(self.key_format, self.value_format)
+ self.session.create(uri, create_params)
+
+ if self.value_format == '8t':
+ value1 = 97
+ value2 = 98
+ else:
+ value1 = 'a' * 500
+ value2 = 'b' * 500
+
+ # Pin oldest and stable to timestamp 1.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) +
+ ',stable_timestamp=' + self.timestamp_str(1))
+
+ # Apply a series of updates from timestamps 10-14.
+ cursor = self.session.open_cursor(uri)
+ for ts in range(10, 15):
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor[self.create_key(i)] = value1
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
+
+ # Reconcile and flush versions 10-13 to the history store.
+ self.session.checkpoint()
+
+ # Evict the data from the cache.
+ self.session.begin_transaction()
+ cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ for i in range(1, self.nrows):
+ cursor2.set_key(self.create_key(i))
+ cursor2.search()
+ cursor2.reset()
+ self.session.rollback_transaction()
+
+ if not self.ooo_value:
+ self.session.breakpoint()
+ # Start a long running transaction to stop the oldest id being advanced.
+ session2 = self.conn.open_session()
+ session2.begin_transaction()
+ long_cursor = session2.open_cursor(uri, None)
+ long_cursor[self.create_key(self.nrows + 10)] = value1
+ long_cursor.reset()
+ long_cursor.close()
+
+ # Remove the key with an ooo or mm timestamp.
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor.set_key(self.create_key(i))
+ cursor.remove()
+ if self.ooo_value:
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5))
+ else:
+ self.session.commit_transaction()
+
+ if not self.globally_visible_before_ckpt:
+ # Reconcile to write the stop time window.
+ self.session.checkpoint()
+
+ if not self.ooo_value:
+ self.session.breakpoint()
+ # Ensure that old reader can read the history content.
+ long_cursor = session2.open_cursor(uri, None)
+ for i in range(1, self.nrows):
+ long_cursor.set_key(self.create_key(i))
+ self.assertEqual(long_cursor.search(), 0)
+ self.assertEqual(long_cursor.get_value(), value1)
+ long_cursor.reset()
+ long_cursor.close()
+
+ # Rollback the long running transaction.
+ session2.rollback_transaction()
+ session2.close()
+
+ # Pin oldest and stable to timestamp 5 so that the ooo tombstone is globally visible.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) +
+ ',stable_timestamp=' + self.timestamp_str(10))
+
+ # Reconcile and remove the obsolete entries.
+ self.session.checkpoint()
+
+ # Evict the data from the cache.
+ self.session.begin_transaction()
+ cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ for i in range(1, self.nrows):
+ cursor2.set_key(self.create_key(i))
+ if self.value_format == '8t':
+ self.assertEqual(cursor2.search(), 0)
+ else:
+ self.assertEqual(cursor2.search(), wiredtiger.WT_NOTFOUND)
+ cursor2.reset()
+ self.session.rollback_transaction()
+
+ # Now apply an insert at timestamp 20.
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor[self.create_key(i)] = value2
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20))
+
+ # Ensure that we blew away history store content.
+ for ts in range(10, 15):
+ self.session.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
+ for i in range(1, self.nrows):
+ cursor.set_key(self.create_key(i))
+ if self.value_format == '8t':
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ self.session.rollback_transaction()
+
+ hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal)
+ self.assertGreater(hs_truncate, 0)
+