summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2022-03-04 15:15:29 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-04 04:40:24 +0000
commitf6cf7f9a95fa0ddbe7e3e911dd08ff3ba6f82223 (patch)
treeb56e272f7b821144810913335c4446a6a0596d87
parent3e81e8d185306a6d7e322b2a8b60103259a5f6b1 (diff)
downloadmongo-f6cf7f9a95fa0ddbe7e3e911dd08ff3ba6f82223.tar.gz
Import wiredtiger: eee2144e597afa473bd8d08c84bab37ea8ff1234 from branch mongodb-5.3
ref: b1a6788043..eee2144e59 for: 5.3.0-rc3 WT-8362 Remove or rewrite HS entries of a key when OOO tombstone is written to datastore
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c41
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h9
-rw-r--r--src/third_party/wiredtiger/src/include/reconcile.h3
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_col.c39
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c18
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c18
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c8
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs31.py182
9 files changed, 280 insertions, 40 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 417516cfabb..fd92e955b6e 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.3",
- "commit": "b1a6788043f564c5fe1956ceb94307cdd47ff9ca"
+ "commit": "eee2144e597afa473bd8d08c84bab37ea8ff1234"
}
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 8b78e203243..6169286024b 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -9,8 +9,8 @@
#include "wt_internal.h"
static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts,
- uint64_t *hs_counter);
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone,
+ bool error_on_ooo_ts, uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -219,8 +219,8 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
}
if (ret == 0)
- WT_ERR(__hs_delete_reinsert_from_pos(
- session, cursor, btree->id, key, tw->start_ts + 1, true, error_on_ooo_ts, &counter));
+ WT_ERR(__hs_delete_reinsert_from_pos(session, cursor, btree->id, key, tw->start_ts + 1,
+ true, false, error_on_ooo_ts, &counter));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -545,7 +545,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) {
/* Delete and reinsert any update of the key with a higher timestamp. */
WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key,
- fix_ts_upd->start_ts + 1, true, error_on_ooo_ts));
+ fix_ts_upd->start_ts + 1, true, false, error_on_ooo_ts));
F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS);
}
}
@@ -790,7 +790,7 @@ err:
*/
int
__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts)
{
WT_DECL_RET;
WT_ITEM hs_key;
@@ -800,10 +800,10 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
bool hs_read_all_flag;
/*
- * If we will delete all the updates of the key from the history store, we should not reinsert
- * any update.
+ * If we delete all the updates of the key from the history store, we should not reinsert any
+ * update except when an out-of-order tombstone is not globally visible yet.
*/
- WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+ WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert);
hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL);
@@ -823,8 +823,8 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
++hs_counter;
}
- WT_ERR(__hs_delete_reinsert_from_pos(
- session, hs_cursor, btree_id, key, ts, reinsert, error_on_ooo_ts, &hs_counter));
+ WT_ERR(__hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert,
+ ooo_tombstone, error_on_ooo_ts, &hs_counter));
done:
err:
@@ -842,7 +842,8 @@ err:
*/
static int
__hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts, uint64_t *counter)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone, bool error_on_ooo_ts,
+ uint64_t *counter)
{
WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -868,9 +869,11 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
WT_UNUSED(key);
#endif
- /* If we will delete all the updates of the key from the history store, we should not reinsert
- * any update. */
- WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+ /*
+ * If we delete all the updates of the key from the history store, we should not reinsert any
+ * update except when an out-of-order tombstone is not globally visible yet.
+ */
+ WT_ASSERT(session, ooo_tombstone || ts > WT_TS_NONE || !reinsert);
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* Ignore records that are obsolete. */
@@ -1011,7 +1014,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
* to the new update.
*/
if (hs_cbt->upd_value->tw.start_ts >= ts)
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts - 1;
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ooo_tombstone ? ts : ts - 1;
else {
hs_insert_tw.start_ts = hs_cbt->upd_value->tw.start_ts;
hs_insert_tw.durable_start_ts = hs_cbt->upd_value->tw.durable_start_ts;
@@ -1023,13 +1026,17 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
* another moved update OR the update itself triggered the correction. In either case,
* we should preserve the stop transaction id.
*/
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts - 1;
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ooo_tombstone ? ts : ts - 1;
hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
/* Extract the underlying value for reinsertion. */
WT_ERR(hs_cursor->get_value(
hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+ /* Reinsert the update with corrected timestamps. */
+ if (ooo_tombstone && hs_ts == ts)
+ *counter = hs_counter;
+
/* Insert the value back with different timestamps. */
hs_insert_cursor->set_key(
hs_insert_cursor, 4, btree_id, &hs_key, hs_insert_tw.start_ts, *counter);
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 80ecdfc4908..c1d0b8ddb05 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -774,8 +774,8 @@ extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *
extern int __wt_hs_config(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool ooo_tombstone,
+ bool error_on_ooo_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
const char *value_format, uint64_t recno, WT_UPDATE_VALUE *upd_value, WT_ITEM *base_value_buf)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1220,8 +1220,9 @@ extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val,
WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t recno,
- WT_ITEM *rowkey) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+ wt_timestamp_t ts, uint64_t recno, WT_ITEM *rowkey, bool reinsert)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref,
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index 02aca0e4e8a..024afebde5e 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -309,7 +309,8 @@ typedef struct {
WT_TIME_WINDOW tw;
- bool upd_saved; /* An element on the row's update chain was saved */
+ bool upd_saved; /* An element on the row's update chain was saved */
+ bool ooo_tombstone; /* Out-of-order tombstone */
} WT_UPDATE_SELECT;
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
index 0f0b40e0dde..4222195202a 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_col.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -753,11 +753,12 @@ __wt_rec_col_fix(
if (upd->type == WT_UPDATE_TOMBSTONE) {
/*
- * When removing a key due to a tombstone with a durable timestamp of "none", also
- * remove the history store contents associated with that key.
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove any
+ * historical versions that are greater in the history store for this key.
*/
- if (upd_select.tw.durable_stop_ts == WT_TS_NONE && r->hs_clear_on_tombstone)
- WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, recno, NULL));
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, upd_select.tw.durable_stop_ts, recno, NULL, false));
val = 0;
} else {
@@ -770,9 +771,18 @@ __wt_rec_col_fix(
__bit_setv(r->first_free, recno - curstartrecno, btree->bitcnt, val);
/* Write the time window. */
- if (!WT_TIME_WINDOW_IS_EMPTY(&upd_select.tw))
+ if (!WT_TIME_WINDOW_IS_EMPTY(&upd_select.tw)) {
+ /*
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove any
+ * historical versions that are greater in the history store for this key.
+ */
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, upd_select.tw.durable_stop_ts, recno, NULL, true));
+
WT_ERR(__wt_rec_col_fix_addtw(
session, r, (uint32_t)(recno - curstartrecno), &upd_select.tw));
+ }
/* If there was an entry in the time windows index for this key, skip over it. */
if (tw < numtws && origstartrecno + page->pg_fix_tws[tw].recno_offset == recno)
@@ -1370,14 +1380,25 @@ record_loop:
case WT_UPDATE_STANDARD:
data = upd->data;
size = upd->size;
+ /*
+ * When an out-of-order or mixed-mode tombstone is getting written to disk,
+ * remove any historical versions that are greater in the history store for this
+ * key.
+ */
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, src_recno, NULL, true));
+
break;
case WT_UPDATE_TOMBSTONE:
/*
- * When removing a key due to a tombstone with a durable timestamp of "none",
- * also remove the history store contents associated with that key.
+ * When an out-of-order or mixed-mode tombstone is getting written to disk,
+ * remove any historical versions that are greater in the history store for this
+ * key.
*/
- if (twp->durable_stop_ts == WT_TS_NONE && r->hs_clear_on_tombstone)
- WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, src_recno, NULL));
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone)
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, src_recno, NULL, false));
deleted = true;
twp = &clear_tw;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 712c340ee61..a3657631701 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -807,6 +807,15 @@ __wt_rec_row_leaf(
case WT_UPDATE_STANDARD:
/* Take the value from the update. */
WT_ERR(__wt_rec_cell_build_val(session, r, upd->data, upd->size, twp, 0));
+ /*
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove
+ * any historical versions that are greater in the history store for that key.
+ */
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) {
+ WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, true));
+ }
dictionary = true;
break;
case WT_UPDATE_TOMBSTONE:
@@ -831,12 +840,13 @@ __wt_rec_row_leaf(
}
/*
- * When removing a key due to a tombstone with a durable timestamp of "none", also
- * remove the history store contents associated with that key.
+ * When an out-of-order or mixed-mode tombstone is getting written to disk, remove
+ * any historical versions that are greater in the history store for this key.
*/
- if (twp->durable_stop_ts == WT_TS_NONE && r->hs_clear_on_tombstone) {
+ if (upd_select.ooo_tombstone && r->hs_clear_on_tombstone) {
WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
- WT_ERR(__wt_rec_hs_clear_on_tombstone(session, r, WT_RECNO_OOB, tmpkey));
+ WT_ERR(__wt_rec_hs_clear_on_tombstone(
+ session, r, twp->durable_stop_ts, WT_RECNO_OOB, tmpkey, false));
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 1fc738d109e..8d92a123813 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -386,6 +386,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
*/
upd_select->upd = NULL;
upd_select->upd_saved = false;
+ upd_select->ooo_tombstone = false;
select_tw = &upd_select->tw;
WT_TIME_WINDOW_INIT(select_tw);
@@ -675,6 +676,23 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, W
WT_RET(__rec_validate_upd_chain(session, r, onpage_upd, select_tw, vpack));
/*
+ * Set the flag if the selected tombstone is an out-of-order or mixed mode to an update. Based
+ * on this flag, the caller functions perform the history store truncation for this key.
+ */
+ if (tombstone != NULL &&
+ !F_ISSET(tombstone, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS)) {
+ upd = upd_select->upd;
+ while (upd != NULL && upd->txnid == WT_TXN_ABORTED)
+ upd = upd->next;
+
+ if (upd != NULL && upd->start_ts > tombstone->start_ts)
+ upd_select->ooo_tombstone = true;
+
+ if (vpack != NULL && vpack->tw.start_ts > upd->start_ts)
+ upd_select->ooo_tombstone = true;
+ }
+
+ /*
* Fixup any out of order timestamps, assert that checkpoint wasn't running when this round of
* reconciliation started.
*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 696d12bd777..ece0dd801c2 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -2711,8 +2711,8 @@ err:
* history store contents associated with that key.
*/
int
-__wt_rec_hs_clear_on_tombstone(
- WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t recno, WT_ITEM *rowkey)
+__wt_rec_hs_clear_on_tombstone(WT_SESSION_IMPL *session, WT_RECONCILE *r, wt_timestamp_t ts,
+ uint64_t recno, WT_ITEM *rowkey, bool reinsert)
{
WT_BTREE *btree;
WT_ITEM hs_recno_key, *key;
@@ -2743,8 +2743,8 @@ __wt_rec_hs_clear_on_tombstone(
* eviction starting its reconciliation as previous checks done while selecting an update will
* detect that.
*/
- WT_RET(
- __wt_hs_delete_key_from_ts(session, r->hs_cursor, btree->id, key, WT_TS_NONE, false, false));
+ WT_RET(__wt_hs_delete_key_from_ts(session, r->hs_cursor, btree->id, key, ts, reinsert, true,
+ F_ISSET(r, WT_REC_CHECKPOINT_RUNNING)));
/* Fail 0.01% of the time. */
if (F_ISSET(r, WT_REC_EVICT) &&
diff --git a/src/third_party/wiredtiger/test/suite/test_hs31.py b/src/third_party/wiredtiger/test/suite/test_hs31.py
new file mode 100644
index 00000000000..d793ce11c44
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_hs31.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-present MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+from wiredtiger import stat
+
+# test_hs31.py
+# Ensure that tombstone with out of order timestamp clear the history store records.
+class test_hs31(wttest.WiredTigerTestCase):
+ conn_config = 'cache_size=5MB,statistics=(all)'
+ format_values = [
+ ('column', dict(key_format='r', value_format='S')),
+ ('column-fix', dict(key_format='r', value_format='8t')),
+ ('integer-row', dict(key_format='i', value_format='S')),
+ ('string-row', dict(key_format='S', value_format='S')),
+ ]
+
+ ooo_values = [
+ ('out-of-order', dict(ooo_value=True)),
+ ('mixed-mode', dict(ooo_value=False)),
+ ]
+
+ globally_visible_before_ckpt_values = [
+ ('globally_visible_before_ckpt', dict(globally_visible_before_ckpt=True)),
+ ('no_globally_visible_before_ckpt', dict(globally_visible_before_ckpt=False)),
+ ]
+
+ scenarios = make_scenarios(format_values, ooo_values, globally_visible_before_ckpt_values)
+ nrows = 1000
+
+ def create_key(self, i):
+ if self.key_format == 'S':
+ return str(i)
+ return i
+
+ def get_stat(self, stat):
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def test_ooo_tombstone_clear_hs(self):
+ uri = 'file:test_hs31'
+ create_params = 'key_format={},value_format={}'.format(self.key_format, self.value_format)
+ self.session.create(uri, create_params)
+
+ if self.value_format == '8t':
+ value1 = 97
+ value2 = 98
+ else:
+ value1 = 'a' * 500
+ value2 = 'b' * 500
+
+ # Pin oldest and stable to timestamp 1.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(1) +
+ ',stable_timestamp=' + self.timestamp_str(1))
+
+ # Apply a series of updates from timestamps 10-14.
+ cursor = self.session.open_cursor(uri)
+ for ts in range(10, 15):
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor[self.create_key(i)] = value1
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(ts))
+
+ # Reconcile and flush versions 10-13 to the history store.
+ self.session.checkpoint()
+
+ # Evict the data from the cache.
+ self.session.begin_transaction()
+ cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ for i in range(1, self.nrows):
+ cursor2.set_key(self.create_key(i))
+ cursor2.search()
+ cursor2.reset()
+ self.session.rollback_transaction()
+
+ if not self.ooo_value:
+ self.session.breakpoint()
+ # Start a long running transaction to stop the oldest id being advanced.
+ session2 = self.conn.open_session()
+ session2.begin_transaction()
+ long_cursor = session2.open_cursor(uri, None)
+ long_cursor[self.create_key(self.nrows + 10)] = value1
+ long_cursor.reset()
+ long_cursor.close()
+
+ # Remove the key with an ooo or mm timestamp.
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor.set_key(self.create_key(i))
+ cursor.remove()
+ if self.ooo_value:
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(5))
+ else:
+ self.session.commit_transaction()
+
+ if not self.globally_visible_before_ckpt:
+ # Reconcile to write the stop time window.
+ self.session.checkpoint()
+
+ if not self.ooo_value:
+ self.session.breakpoint()
+ # Ensure that old reader can read the history content.
+ long_cursor = session2.open_cursor(uri, None)
+ for i in range(1, self.nrows):
+ long_cursor.set_key(self.create_key(i))
+ self.assertEqual(long_cursor.search(), 0)
+ self.assertEqual(long_cursor.get_value(), value1)
+ long_cursor.reset()
+ long_cursor.close()
+
+ # Rollback the long running transaction.
+ session2.rollback_transaction()
+ session2.close()
+
+ # Pin oldest and stable to timestamp 5 so that the ooo tombstone is globally visible.
+ self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(10) +
+ ',stable_timestamp=' + self.timestamp_str(10))
+
+ # Reconcile and remove the obsolete entries.
+ self.session.checkpoint()
+
+ # Evict the data from the cache.
+ self.session.begin_transaction()
+ cursor2 = self.session.open_cursor(uri, None, "debug=(release_evict=true)")
+ for i in range(1, self.nrows):
+ cursor2.set_key(self.create_key(i))
+ if self.value_format == '8t':
+ self.assertEqual(cursor2.search(), 0)
+ else:
+ self.assertEqual(cursor2.search(), wiredtiger.WT_NOTFOUND)
+ cursor2.reset()
+ self.session.rollback_transaction()
+
+ # Now apply an insert at timestamp 20.
+ for i in range(1, self.nrows):
+ self.session.begin_transaction()
+ cursor[self.create_key(i)] = value2
+ self.session.commit_transaction('commit_timestamp=' + self.timestamp_str(20))
+
+ # Ensure that we blew away history store content.
+ for ts in range(10, 15):
+ self.session.begin_transaction('read_timestamp=' + self.timestamp_str(ts))
+ for i in range(1, self.nrows):
+ cursor.set_key(self.create_key(i))
+ if self.value_format == '8t':
+ self.assertEqual(cursor.search(), 0)
+ self.assertEqual(cursor.get_value(), 0)
+ else:
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ self.session.rollback_transaction()
+
+ hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal)
+ self.assertGreater(hs_truncate, 0)
+