summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/history/hs_rec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs_rec.c')
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c285
1 files changed, 152 insertions, 133 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 236f8856257..eb3697423be 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -61,65 +61,6 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree)
}
/*
- * __hs_delete_record --
- * Delete the update left in the history store
- */
-static int
-__hs_delete_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *key,
- WT_UPDATE *delete_upd, WT_UPDATE *delete_tombstone)
-{
- WT_DECL_RET;
- bool hs_read_committed;
-#ifdef HAVE_DIAGNOSTIC
- WT_TIME_WINDOW *hs_tw;
-#endif
-
- hs_read_committed = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
-
- /* No need to delete from the history store if it is already obsolete. */
- if (delete_tombstone != NULL && __wt_txn_upd_visible_all(session, delete_tombstone)) {
- ret = 0;
- goto done;
- }
-
- hs_cursor->set_key(hs_cursor, 4, S2BT(session)->id, key, WT_TS_MAX, UINT64_MAX);
- WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_cursor), true);
- /* It's possible the value in the history store becomes obsolete concurrently. */
- if (ret == WT_NOTFOUND) {
- WT_ASSERT(
- session, delete_tombstone != NULL && __wt_txn_upd_visible_all(session, delete_tombstone));
- ret = 0;
- goto done;
- }
-
-#ifdef HAVE_DIAGNOSTIC
- __wt_hs_upd_time_window(hs_cursor, &hs_tw);
- WT_ASSERT(session, hs_tw->start_txn == WT_TXN_NONE || hs_tw->start_txn == delete_upd->txnid);
- WT_ASSERT(session, hs_tw->start_ts == WT_TS_NONE || hs_tw->start_ts == delete_upd->start_ts);
- WT_ASSERT(session,
- hs_tw->durable_start_ts == WT_TS_NONE || hs_tw->durable_start_ts == delete_upd->durable_ts);
- if (delete_tombstone != NULL) {
- WT_ASSERT(session, hs_tw->stop_txn == delete_tombstone->txnid);
- WT_ASSERT(session, hs_tw->stop_ts == delete_tombstone->start_ts);
- WT_ASSERT(session, hs_tw->durable_stop_ts == delete_tombstone->durable_ts);
- } else
- WT_ASSERT(session, !WT_TIME_WINDOW_HAS_STOP(hs_tw));
-#endif
-
- WT_ERR(hs_cursor->remove(hs_cursor));
-done:
- if (delete_tombstone != NULL)
- F_CLR(delete_tombstone, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS);
- F_CLR(delete_upd, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS);
-
-err:
- if (!hs_read_committed)
- F_CLR(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- return (ret);
-}
-
-/*
* __hs_insert_record --
* A helper function to insert the record into the history store including stop time point.
*/
@@ -345,9 +286,45 @@ __hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates,
}
/*
+ * __hs_pack_key --
+ * Pack the history store key
+ */
+static inline int
+__hs_pack_key(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r, WT_INSERT *ins,
+ WT_ROW *rip, WT_ITEM *key)
+{
+ WT_DECL_RET;
+ uint8_t *p;
+
+ switch (r->page->type) {
+ case WT_PAGE_COL_FIX:
+ case WT_PAGE_COL_VAR:
+ p = key->mem;
+ WT_RET(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(ins)));
+ key->size = WT_PTRDIFF(p, key->data);
+ break;
+ case WT_PAGE_ROW_LEAF:
+ if (ins == NULL) {
+ WT_WITH_BTREE(
+ session, btree, ret = __wt_row_leaf_key(session, r->page, rip, key, false));
+ WT_RET(ret);
+ } else {
+ key->data = WT_INSERT_KEY(ins);
+ key->size = WT_INSERT_KEY_SIZE(ins);
+ }
+ break;
+ default:
+ WT_RET(__wt_illegal_value(session, r->page->type));
+ }
+
+ return (ret);
+}
+
+/*
* __wt_hs_insert_updates --
- * Copy one set of saved updates into the database's history store table. Whether the function
- * fails or succeeds, if there is a successful write to history, cache_write_hs is set to true.
+ * Copy one set of saved updates into the database's history store table if they haven't been
+ * moved there. Whether the function fails or succeeds, if there is a successful write to
+ * history, cache_write_hs is set to true.
*/
int
__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi)
@@ -367,14 +344,12 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
WT_UPDATE_VECTOR updates;
WT_SAVE_UPD *list;
- WT_UPDATE *delete_tombstone, *delete_upd, *newest_hs, *no_ts_upd, *oldest_upd, *prev_upd,
- *ref_upd, *tombstone, *upd;
+ WT_UPDATE *newest_hs, *no_ts_upd, *oldest_upd, *prev_upd, *ref_upd, *tombstone, *upd;
WT_TIME_WINDOW tw;
wt_off_t hs_size;
uint64_t insert_cnt, max_hs_size, modify_cnt;
uint64_t cache_hs_insert_full_update, cache_hs_insert_reverse_modify, cache_hs_write_squash;
uint32_t i;
- uint8_t *p;
int nentries;
bool enable_reverse_modify, error_on_ts_ordering, hs_inserted, squashed;
@@ -407,65 +382,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (list->onpage_upd == NULL)
continue;
- /* History store table key component: source key. */
- switch (r->page->type) {
- case WT_PAGE_COL_FIX:
- case WT_PAGE_COL_VAR:
- p = key->mem;
- WT_ERR(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
- key->size = WT_PTRDIFF(p, key->data);
- break;
- case WT_PAGE_ROW_LEAF:
- if (list->ins == NULL) {
- WT_WITH_BTREE(
- session, btree, ret = __wt_row_leaf_key(session, r->page, list->rip, key, false));
- WT_ERR(ret);
- } else {
- key->data = WT_INSERT_KEY(list->ins);
- key->size = WT_INSERT_KEY_SIZE(list->ins);
- }
- break;
- default:
- WT_ERR(__wt_illegal_value(session, r->page->type));
- }
-
- no_ts_upd = newest_hs = NULL;
- ref_upd = list->onpage_upd;
- delete_tombstone = delete_upd = NULL;
-
- __wt_update_vector_clear(&updates);
-
- /*
- * Reverse deltas are only supported on 'S' and 'u' value formats.
- */
- enable_reverse_modify =
- (WT_STREQ(btree->value_format, "S") || WT_STREQ(btree->value_format, "u"));
-
- /*
- * Delete the update that is both on the update chain and the history store from the history
- * store. Otherwise, we will trigger out of order fix when the update is inserted to the
- * history store again.
- */
- for (upd = list->onpage_tombstone != NULL ? list->onpage_tombstone : list->onpage_upd;
- upd != NULL; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (F_ISSET(upd, WT_UPDATE_TO_DELETE_FROM_HS)) {
- WT_ASSERT_ALWAYS(session, F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_RESTORED_FROM_HS),
- "Attempting to remove an update from the history store in WiredTiger, but the "
- "update was missing.");
- if (upd->type == WT_UPDATE_TOMBSTONE)
- delete_tombstone = upd;
- else {
- delete_upd = upd;
- WT_ERR(
- __hs_delete_record(session, hs_cursor, key, delete_upd, delete_tombstone));
- break;
- }
- }
- }
-
/* Skip aborted updates. */
for (upd = list->onpage_upd->next; upd != NULL && upd->txnid == WT_TXN_ABORTED;
upd = upd->next)
@@ -479,6 +395,20 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (F_ISSET(upd, WT_UPDATE_HS))
continue;
+ /* History store table key component: source key. */
+ WT_ERR(__hs_pack_key(session, btree, r, list->ins, list->rip, key));
+
+ no_ts_upd = newest_hs = NULL;
+ ref_upd = list->onpage_upd;
+
+ __wt_update_vector_clear(&updates);
+
+ /*
+ * Reverse deltas are only supported on 'S' and 'u' value formats.
+ */
+ enable_reverse_modify =
+ (WT_STREQ(btree->value_format, "S") || WT_STREQ(btree->value_format, "u"));
+
/*
* The algorithm assumes the oldest update on the update chain in memory is either a full
* update or a tombstone.
@@ -831,7 +761,7 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btre
{
WT_DECL_RET;
WT_ITEM hs_key;
- wt_timestamp_t hs_ts;
+ wt_timestamp_t hs_start_ts;
uint64_t hs_counter;
uint32_t hs_btree_id;
bool hs_read_all_flag;
@@ -850,7 +780,7 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btre
ret = 0;
goto done;
} else {
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
++hs_counter;
}
@@ -880,8 +810,8 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW hs_insert_tw, tw, *twp;
- wt_timestamp_t hs_ts;
+ WT_TIME_WINDOW hs_insert_tw, *twp;
+ wt_timestamp_t hs_durable_start_ts, hs_durable_stop_ts, hs_start_ts;
uint64_t cache_hs_order_lose_durable_timestamp, cache_hs_order_reinsert, cache_hs_order_remove;
uint64_t hs_counter, hs_upd_type;
uint32_t hs_btree_id;
@@ -958,7 +888,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
continue;
/* We shouldn't have crossed the btree and user key search space. */
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
@@ -970,7 +900,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
* the cell. The cell's start timestamp can be cleared during reconciliation if it is
* globally visible.
*/
- if (hs_ts >= ts || twp->stop_ts >= ts)
+ if (hs_start_ts >= ts || twp->stop_ts >= ts)
break;
}
if (ret == WT_NOTFOUND)
@@ -1026,7 +956,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
*/
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* We shouldn't have crossed the btree and user key search space. */
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
@@ -1044,7 +974,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
* by ignoring them.
*/
__wt_hs_upd_time_window(hs_cursor, &twp);
- if (hs_ts < ts && twp->stop_ts < ts)
+ if (hs_start_ts < ts && twp->stop_ts < ts)
continue;
if (reinsert) {
@@ -1098,10 +1028,10 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
/* Extract the underlying value for reinsertion. */
WT_ERR(hs_cursor->get_value(
- hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+ hs_cursor, &hs_durable_stop_ts, &hs_durable_start_ts, &hs_upd_type, &hs_value));
/* Reinsert the update with corrected timestamps. */
- if (no_ts_tombstone && hs_ts == ts)
+ if (no_ts_tombstone && hs_start_ts == ts)
*counter = hs_counter;
/* Insert the value back with different timestamps. */
@@ -1132,3 +1062,92 @@ err:
return (ret);
}
+
+/*
+ * __hs_delete_record --
+ * Delete an update from the history store if it is not obsolete
+ */
+static int
+__hs_delete_record(
+ WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, WT_UPDATE *upd, WT_UPDATE *tombstone)
+{
+ WT_DECL_RET;
+ bool hs_read_committed;
+#ifdef HAVE_DIAGNOSTIC
+ WT_TIME_WINDOW *hs_tw;
+#endif
+
+ if (r->hs_cursor == NULL)
+ WT_RET(__wt_curhs_open(session, NULL, &r->hs_cursor));
+ hs_read_committed = F_ISSET(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ /* Ensure we can see all the content in the history store. */
+ F_SET(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+
+ /* No need to delete from the history store if it is already obsolete. */
+ if (tombstone != NULL && __wt_txn_upd_visible_all(session, tombstone))
+ goto done;
+
+ r->hs_cursor->set_key(r->hs_cursor, 4, S2BT(session)->id, key, WT_TS_MAX, UINT64_MAX);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, r->hs_cursor), true);
+ /* It's possible the value in the history store becomes obsolete concurrently. */
+ if (ret == WT_NOTFOUND) {
+ WT_ASSERT(session, tombstone != NULL && __wt_txn_upd_visible_all(session, tombstone));
+ ret = 0;
+ } else {
+#ifdef HAVE_DIAGNOSTIC
+ __wt_hs_upd_time_window(r->hs_cursor, &hs_tw);
+ WT_ASSERT(session, hs_tw->start_txn == WT_TXN_NONE || hs_tw->start_txn == upd->txnid);
+ WT_ASSERT(session, hs_tw->start_ts == WT_TS_NONE || hs_tw->start_ts == upd->start_ts);
+ WT_ASSERT(session,
+ hs_tw->durable_start_ts == WT_TS_NONE || hs_tw->durable_start_ts == upd->durable_ts);
+ if (tombstone != NULL) {
+ WT_ASSERT(session, hs_tw->stop_txn == tombstone->txnid);
+ WT_ASSERT(session, hs_tw->stop_ts == tombstone->start_ts);
+ WT_ASSERT(session, hs_tw->durable_stop_ts == tombstone->durable_ts);
+ } else
+ WT_ASSERT(session, !WT_TIME_WINDOW_HAS_STOP(hs_tw));
+#endif
+
+ WT_ERR(r->hs_cursor->remove(r->hs_cursor));
+ }
+done:
+ if (tombstone != NULL)
+ F_CLR(tombstone, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS);
+ F_CLR(upd, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS);
+
+err:
+ if (!hs_read_committed)
+ F_CLR(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ return (ret);
+}
+
+/*
+ * __wt_hs_delete_updates --
+ * Delete the updates from the history store
+ */
+int
+__wt_hs_delete_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+{
+ WT_BTREE *btree;
+ WT_DECL_ITEM(key);
+ WT_DECL_RET;
+ WT_DELETE_HS_UPD *delete_hs_upd;
+ uint32_t i;
+
+ /* Nothing to delete from the history store. */
+ if (r->delete_hs_upd == NULL)
+ return (0);
+
+ btree = S2BT(session);
+
+ WT_RET(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
+
+ for (delete_hs_upd = r->delete_hs_upd, i = 0; i < r->delete_hs_upd_next; ++delete_hs_upd, ++i) {
+ WT_ERR(__hs_pack_key(session, btree, r, delete_hs_upd->ins, delete_hs_upd->rip, key));
+ WT_ERR(__hs_delete_record(session, r, key, delete_hs_upd->upd, delete_hs_upd->tombstone));
+ }
+
+err:
+ __wt_scr_free(session, &key);
+ return (ret);
+}