summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/history/hs_rec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs_rec.c')
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c83
1 files changed, 37 insertions, 46 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 1f927309f94..777c2a74863 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -9,7 +9,8 @@
#include "wt_internal.h"
static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *hs_counter);
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts,
+ uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -63,7 +64,7 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree)
*/
static int
__hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key,
- const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool checkpoint_running)
+ const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool error_on_ooo_ts)
{
#ifdef HAVE_DIAGNOSTIC
WT_CURSOR_BTREE *hs_cbt;
@@ -137,10 +138,6 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
cursor->set_key(cursor, 4, btree->id, key, tw->start_ts, UINT64_MAX);
WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, cursor), true);
- /* Only clear the flag if it wasn't set when we entered the function. */
- if (!hs_read_all_flag)
- F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
-
if (ret == 0) {
WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
@@ -189,28 +186,12 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
if (ret == 0)
WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
else {
- F_SET(cursor, WT_CURSTD_HS_READ_ALL);
-
cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
-
- if (!hs_read_all_flag)
- F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
}
- if (ret == 0) {
- /*
- * Fail the eviction if we detect out of order timestamp when checkpoint is running. We
- * cannot modify the history store to fix the out of order timestamp updates as it may make
- * the history store checkpoint inconsistent.
- */
- if (checkpoint_running) {
- ret = EBUSY;
- WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts);
- goto err;
- }
+ if (ret == 0)
WT_ERR(__hs_delete_reinsert_from_pos(
- session, cursor, btree->id, key, tw->start_ts + 1, true, &counter));
- }
+ session, cursor, btree->id, key, tw->start_ts + 1, true, error_on_ooo_ts, &counter));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -312,9 +293,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
uint32_t i;
uint8_t *p;
int nentries;
- bool checkpoint_running, enable_reverse_modify, hs_inserted, squashed;
+ bool enable_reverse_modify, error_on_ooo_ts, hs_inserted, squashed;
- checkpoint_running = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING);
+ error_on_ooo_ts = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING);
r->cache_write_hs = false;
btree = S2BT(session);
prev_upd = NULL;
@@ -434,11 +415,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (min_ts_upd != NULL && min_ts_upd->start_ts < upd->start_ts &&
out_of_order_ts_upd != min_ts_upd) {
/*
- * Fail the eviction if we detect out of order timestamp when checkpoint is running.
+ * Fail the eviction if we detect out of order timestamps and the error flag is set.
* We cannot modify the history store to fix the out of order timestamp updates as
* it may make the history store checkpoint inconsistent.
*/
- if (checkpoint_running) {
+ if (error_on_ooo_ts) {
ret = EBUSY;
WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts);
goto err;
@@ -513,7 +494,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) {
/* Delete and reinsert any update of the key with a higher timestamp. */
WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key,
- fix_ts_upd->start_ts + 1, true, checkpoint_running));
+ fix_ts_upd->start_ts + 1, true, error_on_ooo_ts));
F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS);
}
}
@@ -670,13 +651,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult
entries, &nentries) == 0) {
WT_ERR(__wt_modify_pack(hs_cursor, entries, nentries, &modify_value));
ret = __hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_MODIFY,
- modify_value, &tw, checkpoint_running);
+ modify_value, &tw, error_on_ooo_ts);
__wt_scr_free(session, &modify_value);
++modify_cnt;
} else {
modify_cnt = 0;
ret = __hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_STANDARD,
- full_value, &tw, checkpoint_running);
+ full_value, &tw, error_on_ooo_ts);
}
/*
@@ -763,7 +744,7 @@ err:
*/
int
__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool checkpoint_running)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts)
{
WT_DECL_RET;
WT_ITEM hs_key;
@@ -781,6 +762,10 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL);
hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts);
+ /*
+ * Setting the flag WT_CURSTD_HS_READ_ALL before searching the history store optimizes the
+ * search routine as we do not skip globally visible tombstones during the search.
+ */
F_SET(hs_cursor, WT_CURSTD_HS_READ_ALL);
WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, hs_cursor), true);
/* Empty history store is fine. */
@@ -792,19 +777,9 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3
++hs_counter;
}
- /*
- * Fail the eviction if we detect out of order timestamp when checkpoint is running. We cannot
- * modify the history store to fix the out of order timestamp updates as it may make the history
- * store checkpoint inconsistent.
- */
- if (checkpoint_running) {
- ret = EBUSY;
- WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts);
- goto err;
- }
+ WT_ERR(__hs_delete_reinsert_from_pos(
+ session, hs_cursor, btree_id, key, ts, reinsert, error_on_ooo_ts, &hs_counter));
- WT_ERR(
- __hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert, &hs_counter));
done:
err:
if (!hs_read_all_flag)
@@ -821,13 +796,13 @@ err:
*/
static int
__hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *counter)
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts, uint64_t *counter)
{
WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
WT_DECL_RET;
WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW tw, hs_insert_tw;
+ WT_TIME_WINDOW hs_insert_tw, tw, *twp;
wt_timestamp_t hs_ts;
uint64_t hs_counter, hs_upd_type;
uint32_t hs_btree_id;
@@ -850,6 +825,11 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ /* Ignore records that are obsolete. */
+ __wt_hs_upd_time_window(hs_cursor, &twp);
+ if (__wt_txn_tw_stop_visible_all(session, twp))
+ continue;
+
/* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
WT_ASSERT(session, hs_btree_id == btree_id);
@@ -866,6 +846,17 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui
WT_ERR(ret);
/*
+ * Fail the eviction if we detect out of order timestamps when we've passed the error return
+ * flag. We cannot modify the history store to fix the out of order timestamp updates as it may
+ * make the history store checkpoint inconsistent.
+ */
+ if (error_on_ooo_ts) {
+ ret = EBUSY;
+ WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts);
+ goto err;
+ }
+
+ /*
* The goal of this function is to move out-of-order content to maintain ordering in the
* history store. We do this by removing content with higher timestamps and reinserting it
* behind (from search's point of view) the newly inserted update. Even though these updates