summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-06-12 16:47:32 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-06-12 07:00:55 +0000
commit4bd561c80d376130cf851fed23d1261ac36179fc (patch)
tree06114c2cd9687df8b66fde29528f3651f47be7b6 /src/third_party
parent4c86696d09d6d53af02452f7a557a9c40eddebfe (diff)
downloadmongo-4bd561c80d376130cf851fed23d1261ac36179fc.tar.gz
Import wiredtiger: f650b1124b18cb4bccd61ca822ed19157206cc7e from branch mongodb-4.4
ref: cd0eca9e68..f650b1124b for: 4.5.1 WT-6347 Clear history store contents for mixed mode updates
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c156
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h11
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c3
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs11.py29
5 files changed, 135 insertions, 66 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index e62088151eb..c657e8107df 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "cd0eca9e685e9ac2446e8e6d5b398b366a7ba420"
+ "commit": "f650b1124b18cb4bccd61ca822ed19157206cc7e"
}
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index b295ac730b4..8dd46f6889e 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -475,7 +475,7 @@ err:
static int
__hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
const WT_ITEM *key, const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value,
- WT_HS_TIME_POINT *stop_time_point)
+ WT_HS_TIME_POINT *stop_time_point, bool clear_hs)
{
WT_DECL_RET;
@@ -500,38 +500,27 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
*/
WT_ASSERT(session, type == WT_UPDATE_STANDARD || type == WT_UPDATE_MODIFY);
- /*
- * If the time points are out of order (which can happen if the application performs updates
- * with out-of-order timestamps), so this value can never be seen, don't bother inserting it.
- */
- if (stop_time_point->ts < upd->start_ts ||
- (stop_time_point->ts == upd->start_ts && stop_time_point->txnid <= upd->txnid)) {
- char ts_string[2][WT_TS_INT_STRING_SIZE];
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "Warning: fixing out-of-order timestamps %s earlier than previous update %s",
- __wt_timestamp_to_string(stop_time_point->ts, ts_string[0]),
- __wt_timestamp_to_string(upd->start_ts, ts_string[1]));
- return (0);
- }
-
/* The tree structure can change while we try to insert the mod list, retry if that happens. */
while ((ret = __hs_insert_record_with_btree_int(
session, cursor, btree, key, upd, type, hs_value, stop_time_point)) == WT_RESTART)
WT_STAT_CONN_INCR(session, cache_hs_insert_restart);
WT_ERR(ret);
- /* If we inserted a timestamped update, we don't need to delete any history store records. */
- if (upd->start_ts != WT_TS_NONE)
+ /* Done if we don't need to clear the history store content. */
+ if (!clear_hs)
goto done;
/*
- * If we inserted an update with no timestamp, we need to delete all history records for that
+ * We can only insert update without timestamp into the history store if we need to clear the
+ * history store record.
+ */
+ WT_ASSERT(session, upd->start_ts == WT_TS_NONE);
+
+ /*
+ * If we need to clear the history store content, we need to delete all history records for that
* key that are further in the history table than us (the key is lexicographically greater). For
* timestamped tables that are occasionally getting a non-timestamped update, that means that
- * all timestamped updates should get removed. In the case of non-timestamped tables, that means
- * that all updates with higher transaction ids will get removed (which could happen at some
- * more relaxed isolation levels). We're pointing at the newly inserted update, iterate once
- * more to avoid deleting it.
+ * all timestamped updates should get removed.
*/
WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
@@ -540,7 +529,6 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
ret = 0;
goto done;
}
-
while ((ret = __hs_delete_key_from_pos(session, cursor, btree->id, key)) == WT_RESTART)
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts_restart);
WT_ERR(ret);
@@ -561,14 +549,14 @@ err:
static int
__hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key,
const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value,
- WT_HS_TIME_POINT *stop_time_point)
+ WT_HS_TIME_POINT *stop_time_point, bool clear_hs)
{
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
cbt = (WT_CURSOR_BTREE *)cursor;
WT_WITH_BTREE(session, CUR2BT(cbt), ret = __hs_insert_record_with_btree(session, cursor, btree,
- key, upd, type, hs_value, stop_time_point));
+ key, upd, type, hs_value, stop_time_point, clear_hs));
return (ret);
}
@@ -578,19 +566,12 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
*/
static inline int
__hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies,
- WT_ITEM *older_full_value, uint32_t btree_id, const WT_ITEM *key, WT_ITEM *full_value,
- WT_UPDATE **updp)
+ WT_ITEM *older_full_value, WT_ITEM *full_value, WT_UPDATE **updp)
{
WT_UPDATE *upd;
*updp = NULL;
__wt_modify_vector_pop(modifies, &upd);
if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (upd->start_ts == WT_TS_NONE) {
- /* We can only delete history store entries that have timestamps. */
- WT_RET(__wt_hs_delete_key_from_ts(session, btree_id, key, 1));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
- }
-
if (modifies->size == 0) {
WT_ASSERT(session, older_full_value == NULL);
*updp = upd;
@@ -634,14 +615,14 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
WT_MODIFY_VECTOR modifies;
WT_SAVE_UPD *list;
- WT_UPDATE *prev_upd, *upd;
+ WT_UPDATE *first_non_ts_upd, *oldest_upd, *prev_upd, *upd;
WT_HS_TIME_POINT stop_time_point;
wt_off_t hs_size;
uint64_t insert_cnt, max_hs_size;
uint32_t i;
uint8_t *p;
int nentries;
- bool enable_reverse_modify, squashed;
+ bool clear_hs, enable_reverse_modify, squashed, ts_updates_in_hs;
btree = S2BT(session);
cursor = session->hs_cursor;
@@ -694,6 +675,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
session, btree, upd = __wt_update_obsolete_check(session, page, list->onpage_upd, true));
__wt_free_update_list(session, &upd);
upd = list->onpage_upd;
+
+ first_non_ts_upd = NULL;
+ ts_updates_in_hs = false;
enable_reverse_modify = true;
/*
@@ -745,6 +729,19 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
if (prev_upd != NULL && prev_upd->start_ts < upd->start_ts)
enable_reverse_modify = false;
+ /* Find the first update without timestamp. */
+ if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE) {
+ first_non_ts_upd = upd;
+ } else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) {
+ /*
+ * Don't insert updates with timestamps after updates without timestamps to the
+ * history store.
+ */
+ F_SET(upd, WT_UPDATE_MASKED_BY_NON_TS_UPDATE);
+ if (F_ISSET(upd, WT_UPDATE_HS))
+ ts_updates_in_hs = true;
+ }
+
/*
* If we've reached a full update and it's in the history store we don't need to
* continue as anything beyond this point won't help with calculating deltas.
@@ -758,14 +755,35 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
/* Construct the oldest full update. */
WT_ASSERT(session, modifies.size > 0);
-#ifdef HAVE_DIAGNOSTIC
- __wt_modify_vector_peek(&modifies, &upd);
+ __wt_modify_vector_peek(&modifies, &oldest_upd);
- WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_TOMBSTONE);
-#endif
+ WT_ASSERT(session,
+ oldest_upd->type == WT_UPDATE_STANDARD || oldest_upd->type == WT_UPDATE_TOMBSTONE);
- WT_ERR(
- __hs_next_upd_full_value(session, &modifies, NULL, btree->id, key, full_value, &upd));
+ /*
+ * Clear the history store here if the oldest update is a tombstone and it is the first
+ * update without timestamp on the update chain because we don't have the cursor placed at
+ * the correct place to delete the history store records when inserting the first update and
+ * it may be skipped if there is nothing to insert to the history store.
+ */
+ if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd) {
+ /* We can only delete history store entries that have timestamps. */
+ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ clear_hs = false;
+ } else
+ /*
+ * Clear the content with timestamps in the history store if we see updates without
+ * timestamps on the update chain.
+ *
+ * We don't need to clear the history store records if everything is still on the insert
+ * list and there are no updates moved to the history store by checkpoint or a failed
+ * eviction.
+ */
+ clear_hs = first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_HS) &&
+ (list->ins == NULL || ts_updates_in_hs);
+
+ WT_ERR(__hs_next_upd_full_value(session, &modifies, NULL, full_value, &upd));
squashed = false;
@@ -804,8 +822,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
stop_time_point.txnid = prev_upd->txnid;
}
- WT_ERR(__hs_next_upd_full_value(
- session, &modifies, full_value, btree->id, key, prev_full_value, &prev_upd));
+ WT_ERR(
+ __hs_next_upd_full_value(session, &modifies, full_value, prev_full_value, &prev_upd));
/* Squash the updates from the same transaction. */
if (upd->start_ts == prev_upd->start_ts && upd->txnid == prev_upd->txnid) {
@@ -813,22 +831,42 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
continue;
}
- if (F_ISSET(upd, WT_UPDATE_HS))
+ /* Skip updates already in the history store or masked by updates without timestamps. */
+ if (F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_MASKED_BY_NON_TS_UPDATE))
continue;
- /* Calculate reverse modify. */
+ /*
+ * If the time points are out of order (which can happen if the application performs
+ * updates with out-of-order timestamps), so this value can never be seen, don't bother
+ * inserting it.
+ */
+ if (stop_time_point.ts < upd->start_ts ||
+ (stop_time_point.ts == upd->start_ts && stop_time_point.txnid <= upd->txnid)) {
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "Warning: fixing out-of-order timestamps %s earlier than previous update %s",
+ __wt_timestamp_to_string(stop_time_point.ts, ts_string[0]),
+ __wt_timestamp_to_string(upd->start_ts, ts_string[1]));
+ continue;
+ }
+
+ /*
+ * Calculate reverse modify and clear the history store records with timestamps when
+ * inserting the first update.
+ */
nentries = MAX_REVERSE_MODIFY_NUM;
if (upd->type == WT_UPDATE_MODIFY && enable_reverse_modify &&
__wt_calc_modify(session, prev_full_value, full_value, prev_full_value->size / 10,
entries, &nentries) == 0) {
WT_ERR(__wt_modify_pack(cursor, entries, nentries, &modify_value));
WT_ERR(__hs_insert_record(session, cursor, btree, key, upd, WT_UPDATE_MODIFY,
- modify_value, &stop_time_point));
+ modify_value, &stop_time_point, clear_hs));
__wt_scr_free(session, &modify_value);
} else
WT_ERR(__hs_insert_record(session, cursor, btree, key, upd, WT_UPDATE_STANDARD,
- full_value, &stop_time_point));
+ full_value, &stop_time_point, clear_hs));
+ clear_hs = false;
/* Flag the update as now in the history store. */
F_SET(upd, WT_UPDATE_HS);
++insert_cnt;
@@ -840,6 +878,28 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
if (modifies.size > 0)
WT_STAT_CONN_INCR(session, cache_hs_write_squash);
+
+ /*
+ * We need to clear the history store if we haven't inserted anything into the history store
+ * and there are updates without timestamps in the middle of the update chain.
+ *
+ * e.g., U@10 -> T@0 -> U@5.
+ *
+ * But we don't need to clear the history store if we write an update without timestamp to
+ * the data store because we don't insert any update with timestamp to the history store and
+ * we will clear the history store again once that update is moved to the history store.
+ *
+ * e.g., U@0 -> U@10 -> U@5 and U@1 in the history store. U@10 and U@5 are not inserted to
+ * the history store as they are flagged as WT_UPDATE_MASKED_BY_NON_TS_UPDATE and U@1 is not
+ * removed from the history store. U@1 will be removed from the history store once U@0 is
+ * moved to the history store.
+ */
+ if (clear_hs && (first_non_ts_upd->txnid != list->onpage_upd->txnid ||
+ first_non_ts_upd->start_ts != list->onpage_upd->start_ts)) {
+ /* We can only delete history store entries that have timestamps. */
+ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ }
}
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
@@ -1202,6 +1262,8 @@ __hs_delete_key_from_ts_int(
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0)
goto done;
+
+ WT_ASSERT(session, ts == WT_TS_NONE || hs_start_ts != WT_TS_NONE);
WT_ERR(__hs_delete_key_from_pos(session, hs_cursor, btree_id, key));
done:
ret = 0;
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index c9711020d89..c79316217dc 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1073,11 +1073,12 @@ struct __wt_update {
volatile uint8_t prepare_state; /* prepare state */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_UPDATE_HS 0x1u /* Update has been written to history store. */
-#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x2u /* Prepared update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_DS 0x4u /* Update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_HS 0x8u /* Update restored from history store. */
- /* AUTOMATIC FLAG VALUE GENERATION STOP */
+#define WT_UPDATE_HS 0x01u /* Update has been written to history store. */
+#define WT_UPDATE_MASKED_BY_NON_TS_UPDATE 0x02u /* Update masked by updates without timestamp. */
+#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x04u /* Prepared update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_DS 0x08u /* Update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_HS 0x10u /* Update restored from history store. */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 56d3f711f10..197a3a920cb 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -140,9 +140,6 @@ __rec_append_orig_value(
* timestamped globally visible tombstone because even if its timestamp is smaller than
* the entries in the history store, we can't change the history store entries. This is
* not correct but we hope we can get away with it.
- *
- * FIXME-WT-6171: remove this once we get rid of out of order timestamps and mixed mode
- * transactions.
*/
if (unpack->tw.durable_stop_ts != WT_TS_NONE && tombstone_globally_visible)
return (0);
diff --git a/src/third_party/wiredtiger/test/suite/test_hs11.py b/src/third_party/wiredtiger/test/suite/test_hs11.py
index efc9d02401c..eaa991557ee 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs11.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs11.py
@@ -27,18 +27,22 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
+from wtscenario import make_scenarios
def timestamp_str(t):
return '%x' % t
# test_hs11.py
-# Ensure that when we delete a key due to a tombstone being globally visible, we delete its
-# associated history store content.
+# Ensure that updates without timestamps clear the history store records.
class test_hs11(wttest.WiredTigerTestCase):
conn_config = 'cache_size=50MB'
session_config = 'isolation=snapshot'
+ scenarios = make_scenarios([
+ ('deletion', dict(update_type='deletion')),
+ ('update', dict(update_type='update')),
+ ])
- def test_key_deletion_clears_hs(self):
+ def test_non_ts_updates_clears_hs(self):
uri = 'table:test_hs11'
create_params = 'key_format=S,value_format=S'
self.session.create(uri, create_params)
@@ -58,14 +62,16 @@ class test_hs11(wttest.WiredTigerTestCase):
# Reconcile and flush versions 1-3 to the history store.
self.session.checkpoint()
- # Apply a non-timestamped tombstone. When the pages get evicted, the keys will get deleted
- # since the tombstone is globally visible.
+ # Apply an update without timestamp.
for i in range(1, 10000):
if i % 2 == 0:
- cursor.set_key(str(i))
- cursor.remove()
+ if self.update_type == 'deletion':
+ cursor.set_key(str(i))
+ cursor.remove()
+ else:
+ cursor[str(i)] = value2
- # Now apply an update at timestamp 10 to recreate each key.
+ # Now apply an update at timestamp 10.
for i in range(1, 10000):
self.session.begin_transaction()
cursor[str(i)] = value2
@@ -76,8 +82,11 @@ class test_hs11(wttest.WiredTigerTestCase):
self.session.begin_transaction('read_timestamp=' + timestamp_str(ts))
for i in range(1, 10000):
if i % 2 == 0:
- cursor.set_key(str(i))
- self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ if self.update_type == 'deletion':
+ cursor.set_key(str(i))
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ else:
+ self.assertEqual(cursor[str(i)], value2)
else:
self.assertEqual(cursor[str(i)], value1)
self.session.rollback_transaction()