summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/history/hs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs.c')
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c148
1 files changed, 49 insertions, 99 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 6d1c63c5fdf..398b871ea57 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -202,6 +202,9 @@ __wt_hs_cursor_open(WT_SESSION_IMPL *session)
session, ret = __wt_open_cursor(session, WT_HS_URI, NULL, open_cursor_cfg, &cursor));
WT_RET(ret);
+ /* History store cursors should always ignore tombstones. */
+ F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
+
session->hs_cursor = cursor;
F_SET(session, WT_SESSION_HS_CURSOR);
@@ -215,7 +218,6 @@ __wt_hs_cursor_open(WT_SESSION_IMPL *session)
int
__wt_hs_cursor(WT_SESSION_IMPL *session, uint32_t *session_flags, bool *is_owner)
{
-
/*
* We don't want to get tapped for eviction after we start using the history store cursor; save
* a copy of the current eviction state, we'll turn eviction off before we return.
@@ -288,27 +290,12 @@ int
__wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd)
{
WT_DECL_RET;
- WT_PAGE_MODIFY *mod;
- WT_SESSION_IMPL *session;
- WT_UPDATE *last_upd;
-
- session = CUR2S(hs_cbt);
-
- /* If there are existing updates, append them after the new updates. */
- if (hs_cbt->compare == 0) {
- for (last_upd = hs_upd; last_upd->next != NULL; last_upd = last_upd->next)
- ;
- if (hs_cbt->ins != NULL)
- last_upd->next = hs_cbt->ins->upd;
- else if ((mod = hs_cbt->ref->page->modify) != NULL && mod->mod_row_update != NULL)
- last_upd->next = mod->mod_row_update[hs_cbt->slot];
- }
/*
* We don't have exclusive access to the history store page so we need to pass "false" here to
* ensure that we're locking when inserting new keys to an insert list.
*/
- WT_WITH_BTREE(session, CUR2BT(hs_cbt),
+ WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt),
ret = __wt_row_modify(hs_cbt, &hs_cbt->iface.key, NULL, hs_upd, WT_UPDATE_INVALID, false));
return (ret);
}
@@ -518,8 +505,6 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
*/
WT_ERR(__wt_cursor_key_order_init((WT_CURSOR_BTREE *)cursor));
#endif
- F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
-
/* We're pointing at the newly inserted update. Iterate once more to avoid deleting it. */
WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
@@ -536,8 +521,6 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
done:
err:
- F_CLR(cursor, WT_CURSTD_IGNORE_TOMBSTONE);
-
/* We did a row search, release the cursor so that the page doesn't continue being held. */
cursor->reset(cursor);
@@ -609,7 +592,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
uint32_t i;
uint8_t *p;
int nentries;
- bool squashed, track_prepare, updates_in_hs, updates_older_than_onpage;
+ bool squashed, track_prepare;
uint8_t upd_count;
btree = S2BT(session);
@@ -664,7 +647,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
__wt_free_update_list(session, &upd);
upd = list->onpage_upd;
second_older_than_prepare = NULL;
- track_prepare = updates_in_hs = updates_older_than_onpage = false;
+ track_prepare = false;
upd_count = 0;
/*
@@ -724,15 +707,12 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
}
}
- if (F_ISSET(upd, WT_UPDATE_HS)) {
- updates_in_hs = true;
- /*
- * If we've reached a full update and its in the history store we don't need to
- * continue as anything beyond this point won't help with calculating deltas.
- */
- if (upd->type == WT_UPDATE_STANDARD)
- break;
- }
+ /*
+ * If we've reached a full update and its in the history store we don't need to continue
+ * as anything beyond this point won't help with calculating deltas.
+ */
+ if (upd->type == WT_UPDATE_STANDARD && F_ISSET(upd, WT_UPDATE_HS))
+ break;
}
upd = NULL;
@@ -742,16 +722,10 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
__wt_modify_vector_pop(&modifies, &upd);
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_TOMBSTONE);
-
/* Skip TOMBSTONE at the end of the update chain. */
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (modifies.size > 0) {
- /*
- * We don't need to delete the history store records if everything is still on the
- * insert list and there are no updates moved to the history store by checkpoint or
- * a failed eviction.
- */
- if ((list->ins == NULL || updates_in_hs) && upd->start_ts == WT_TS_NONE) {
+ if (upd->start_ts == WT_TS_NONE) {
/* We can only delete history store entries that have timestamps. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
@@ -777,7 +751,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
tmp = full_value, full_value = prev_full_value, prev_full_value = tmp,
upd = prev_upd) {
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_MODIFY);
- updates_older_than_onpage = true;
__wt_modify_vector_pop(&modifies, &prev_upd);
@@ -803,24 +776,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
stop_time_point.txnid = prev_upd->txnid;
}
- /*
- * Delete the history store records if we detect a mixed mode update. We don't need to
- * do that if everything is still on the insert list and there are no updates moved to
- * the history store by checkpoint or a failed eviction.
- *
- * Note that if the update is restored from data store or history store, we may have
- * cleared its timestamp, remove the history store contents anyway in this case.
- */
- if ((list->ins == NULL || updates_in_hs) && prev_upd->start_ts == WT_TS_NONE &&
- (upd->start_ts != WT_TS_NONE ||
- F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DS | WT_UPDATE_RESTORED_FROM_HS))) {
- /* We can only delete history store entries that have timestamps. */
- WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
- }
-
if (prev_upd->type == WT_UPDATE_TOMBSTONE) {
WT_ASSERT(session, modifies.size > 0);
+ if (prev_upd->start_ts == WT_TS_NONE) {
+ /* We can only delete history store entries that have timestamps. */
+ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
+ WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
+ }
__wt_modify_vector_pop(&modifies, &prev_upd);
WT_ASSERT(session, prev_upd->type == WT_UPDATE_STANDARD);
prev_full_value->data = prev_upd->data;
@@ -874,24 +836,8 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi)
squashed = true;
}
- WT_ASSERT(session,
- upd->txnid == list->onpage_upd->txnid && upd->start_ts == list->onpage_upd->start_ts);
-
if (modifies.size > 0)
WT_STAT_CONN_INCR(session, cache_hs_write_squash);
-
- /*
- * Delete the history store records if the onpage update's timestamp is WT_TS_NONE and we
- * don't see any update older than it. We don't need to do that if everything is still on
- * the insert list and there are no updates moved to the history store by checkpoint or a
- * failed eviction.
- */
- if (!updates_older_than_onpage && (list->ins == NULL || updates_in_hs) &&
- upd->start_ts == WT_TS_NONE) {
- /* We can only delete history store entries that have timestamps. */
- WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, key, 1));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts);
- }
}
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
@@ -984,6 +930,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf)
{
WT_CURSOR *hs_cursor;
+ WT_CURSOR_BTREE *hs_cbt;
WT_DECL_ITEM(hs_value);
WT_DECL_ITEM(orig_hs_value_buf);
WT_DECL_RET;
@@ -1030,6 +977,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
/* Open a history store table cursor. */
WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner));
hs_cursor = session->hs_cursor;
+ hs_cbt = (WT_CURSOR_BTREE *)hs_cursor;
/*
* After positioning our cursor, we're stepping backwards to find the correct update. Since the
@@ -1043,19 +991,37 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
ret = 0;
goto done;
}
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
+ for (;; ret = hs_cursor->prev(hs_cursor)) {
+ WT_ERR_NOTFOUND_OK(ret, true);
+ /* If we hit the end of the table, let's get out of here. */
+ if (ret == WT_NOTFOUND) {
+ ret = 0;
+ goto done;
+ }
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
- /* Stop before crossing over to the next btree */
- if (hs_btree_id != S2BT(session)->id)
- goto done;
+ /* Stop before crossing over to the next btree */
+ if (hs_btree_id != S2BT(session)->id)
+ goto done;
- /*
- * Keys are sorted in an order, skip the ones before the desired key, and bail out if we have
- * crossed over the desired key and not found the record we are looking for.
- */
- WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
- if (cmp != 0)
- goto done;
+ /*
+ * Keys are sorted in an order, skip the ones before the desired key, and bail out if we
+ * have crossed over the desired key and not found the record we are looking for.
+ */
+ WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
+ if (cmp != 0)
+ goto done;
+
+ /*
+ * If the stop time point of a record is visible to us, we won't be able to see anything for
+ * this entire key. Just jump straight to the end.
+ */
+ if (__wt_txn_tw_stop_visible(session, &hs_cbt->upd_value->tw))
+ goto done;
+ /* If the start time point is visible to us, let's return that record. */
+ if (__wt_txn_tw_start_visible(session, &hs_cbt->upd_value->tw))
+ break;
+ }
WT_ERR(hs_cursor->get_value(
hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &upd_type_full, hs_value));
@@ -1086,7 +1052,6 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
* visibility checks when reading in order to construct the modify chain, so we can create
* the value we expect.
*/
- F_SET(session, WT_SESSION_HS_IGNORE_VISIBILITY);
while (upd_type == WT_UPDATE_MODIFY) {
WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL));
WT_ERR(__wt_modify_vector_push(&modifies, mod_upd));
@@ -1129,7 +1094,6 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
&upd_type_full, hs_value));
upd_type = (uint8_t)upd_type_full;
}
- F_CLR(session, WT_SESSION_HS_IGNORE_VISIBILITY);
WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD);
while (modifies.size > 0) {
__wt_modify_vector_pop(&modifies, &mod_upd);
@@ -1153,8 +1117,6 @@ skip_buf:
done:
err:
- F_CLR(session, WT_SESSION_HS_IGNORE_VISIBILITY);
-
if (orig_hs_value_buf != NULL)
__wt_scr_free(session, &orig_hs_value_buf);
else
@@ -1270,20 +1232,10 @@ __wt_hs_delete_key_from_ts(
WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner));
- /*
- * In order to delete a key range, we need to be able to inspect all history store records
- * regardless of their stop time points and the visibility of their values.
- */
- F_SET(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
- F_SET(session, WT_SESSION_HS_IGNORE_VISIBILITY);
-
/* The tree structure can change while we try to insert the mod list, retry if that happens. */
while ((ret = __hs_delete_key_from_ts_int(session, btree_id, key, ts)) == WT_RESTART)
WT_STAT_CONN_INCR(session, cache_hs_insert_restart);
- F_CLR(session, WT_SESSION_HS_IGNORE_VISIBILITY);
- F_CLR(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
-
WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner));
return (ret);
}
@@ -1375,7 +1327,6 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32
* visible tombstones in the data table to verify the corresponding entries in the history store
* are too present in the data store.
*/
- F_SET(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
F_SET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE);
/*
@@ -1426,7 +1377,6 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32
WT_ERR_NOTFOUND_OK(ret, true);
err:
F_CLR(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE);
- F_CLR(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE);
WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0);
__wt_scr_free(session, &prev_hs_key);
return (ret);