diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs_rec.c')
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs_rec.c | 83 |
1 files changed, 37 insertions, 46 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c index 1f927309f94..777c2a74863 100644 --- a/src/third_party/wiredtiger/src/history/hs_rec.c +++ b/src/third_party/wiredtiger/src/history/hs_rec.c @@ -9,7 +9,8 @@ #include "wt_internal.h" static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, - uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *hs_counter); + uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts, + uint64_t *hs_counter); /* * __hs_verbose_cache_stats -- @@ -63,7 +64,7 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree) */ static int __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key, - const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool checkpoint_running) + const uint8_t type, const WT_ITEM *hs_value, WT_TIME_WINDOW *tw, bool error_on_ooo_ts) { #ifdef HAVE_DIAGNOSTIC WT_CURSOR_BTREE *hs_cbt; @@ -137,10 +138,6 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, cursor->set_key(cursor, 4, btree->id, key, tw->start_ts, UINT64_MAX); WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, cursor), true); - /* Only clear the flag if it wasn't set when we entered the function. */ - if (!hs_read_all_flag) - F_CLR(cursor, WT_CURSTD_HS_READ_ALL); - if (ret == 0) { WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); @@ -189,28 +186,12 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, if (ret == 0) WT_ERR_NOTFOUND_OK(cursor->next(cursor), true); else { - F_SET(cursor, WT_CURSTD_HS_READ_ALL); - cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1); WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true); - - if (!hs_read_all_flag) - F_CLR(cursor, WT_CURSTD_HS_READ_ALL); } - if (ret == 0) { - /* - * Fail the eviction if we detect out of order timestamp when checkpoint is running. We - * cannot modify the history store to fix the out of order timestamp updates as it may make - * the history store checkpoint inconsistent. - */ - if (checkpoint_running) { - ret = EBUSY; - WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); - goto err; - } + if (ret == 0) WT_ERR(__hs_delete_reinsert_from_pos( - session, cursor, btree->id, key, tw->start_ts + 1, true, &counter)); - } + session, cursor, btree->id, key, tw->start_ts + 1, true, error_on_ooo_ts, &counter)); #ifdef HAVE_DIAGNOSTIC /* @@ -312,9 +293,9 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult uint32_t i; uint8_t *p; int nentries; - bool checkpoint_running, enable_reverse_modify, hs_inserted, squashed; + bool enable_reverse_modify, error_on_ooo_ts, hs_inserted, squashed; - checkpoint_running = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING); + error_on_ooo_ts = F_ISSET(r, WT_REC_CHECKPOINT_RUNNING); r->cache_write_hs = false; btree = S2BT(session); prev_upd = NULL; @@ -434,11 +415,11 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult if (min_ts_upd != NULL && min_ts_upd->start_ts < upd->start_ts && out_of_order_ts_upd != min_ts_upd) { /* - * Fail the eviction if we detect out of order timestamp when checkpoint is running. + * Fail the eviction if we detect out of order timestamps and the error flag is set. * We cannot modify the history store to fix the out of order timestamp updates as * it may make the history store checkpoint inconsistent. */ - if (checkpoint_running) { + if (error_on_ooo_ts) { ret = EBUSY; WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); goto err; @@ -513,7 +494,7 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) { /* Delete and reinsert any update of the key with a higher timestamp. */ WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, - fix_ts_upd->start_ts + 1, true, checkpoint_running)); + fix_ts_upd->start_ts + 1, true, error_on_ooo_ts)); F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS); } } @@ -670,13 +651,13 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult entries, &nentries) == 0) { WT_ERR(__wt_modify_pack(hs_cursor, entries, nentries, &modify_value)); ret = __hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_MODIFY, - modify_value, &tw, checkpoint_running); + modify_value, &tw, error_on_ooo_ts); __wt_scr_free(session, &modify_value); ++modify_cnt; } else { modify_cnt = 0; ret = __hs_insert_record(session, hs_cursor, btree, key, WT_UPDATE_STANDARD, - full_value, &tw, checkpoint_running); + full_value, &tw, error_on_ooo_ts); } /* @@ -763,7 +744,7 @@ err: */ int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool checkpoint_running) + const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts) { WT_DECL_RET; WT_ITEM hs_key; @@ -781,6 +762,10 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3 hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL); hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts); + /* + * Setting the flag WT_CURSTD_HS_READ_ALL before searching the history store optimizes the + * search routine as we do not skip globally visible tombstones during the search. + */ F_SET(hs_cursor, WT_CURSTD_HS_READ_ALL); WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, hs_cursor), true); /* Empty history store is fine. */ @@ -792,19 +777,9 @@ __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint3 ++hs_counter; } - /* - * Fail the eviction if we detect out of order timestamp when checkpoint is running. We cannot - * modify the history store to fix the out of order timestamp updates as it may make the history - * store checkpoint inconsistent. - */ - if (checkpoint_running) { - ret = EBUSY; - WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); - goto err; - } + WT_ERR(__hs_delete_reinsert_from_pos( + session, hs_cursor, btree_id, key, ts, reinsert, error_on_ooo_ts, &hs_counter)); - WT_ERR( - __hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert, &hs_counter)); done: err: if (!hs_read_all_flag) @@ -821,13 +796,13 @@ err: */ static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, - const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *counter) + const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, bool error_on_ooo_ts, uint64_t *counter) { WT_CURSOR *hs_insert_cursor; WT_CURSOR_BTREE *hs_cbt; WT_DECL_RET; WT_ITEM hs_key, hs_value; - WT_TIME_WINDOW tw, hs_insert_tw; + WT_TIME_WINDOW hs_insert_tw, tw, *twp; wt_timestamp_t hs_ts; uint64_t hs_counter, hs_upd_type; uint32_t hs_btree_id; @@ -850,6 +825,11 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui WT_ASSERT(session, ts > WT_TS_NONE || !reinsert); for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + /* Ignore records that are obsolete. */ + __wt_hs_upd_time_window(hs_cursor, &twp); + if (__wt_txn_tw_stop_visible_all(session, twp)) + continue; + /* We shouldn't have crossed the btree and user key search space. */ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter)); WT_ASSERT(session, hs_btree_id == btree_id); @@ -866,6 +846,17 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui WT_ERR(ret); /* + * Fail the eviction if we detect out of order timestamps when we've passed the error return + * flag. We cannot modify the history store to fix the out of order timestamp updates as it may + * make the history store checkpoint inconsistent. + */ + if (error_on_ooo_ts) { + ret = EBUSY; + WT_STAT_CONN_INCR(session, cache_eviction_fail_checkpoint_out_of_order_ts); + goto err; + } + + /* * The goal of this function is to move out-of-order content to maintain ordering in the * history store. We do this by removing content with higher timestamps and reinserting it * behind (from search's point of view) the newly inserted update. Even though these updates |