diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs_rec.c')
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs_rec.c | 285 |
1 files changed, 152 insertions, 133 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c index 236f8856257..eb3697423be 100644 --- a/src/third_party/wiredtiger/src/history/hs_rec.c +++ b/src/third_party/wiredtiger/src/history/hs_rec.c @@ -61,65 +61,6 @@ __hs_verbose_cache_stats(WT_SESSION_IMPL *session, WT_BTREE *btree) } /* - * __hs_delete_record -- - * Delete the update left in the history store - */ -static int -__hs_delete_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *key, - WT_UPDATE *delete_upd, WT_UPDATE *delete_tombstone) -{ - WT_DECL_RET; - bool hs_read_committed; -#ifdef HAVE_DIAGNOSTIC - WT_TIME_WINDOW *hs_tw; -#endif - - hs_read_committed = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED); - F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED); - - /* No need to delete from the history store if it is already obsolete. */ - if (delete_tombstone != NULL && __wt_txn_upd_visible_all(session, delete_tombstone)) { - ret = 0; - goto done; - } - - hs_cursor->set_key(hs_cursor, 4, S2BT(session)->id, key, WT_TS_MAX, UINT64_MAX); - WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_cursor), true); - /* It's possible the value in the history store becomes obsolete concurrently. */ - if (ret == WT_NOTFOUND) { - WT_ASSERT( - session, delete_tombstone != NULL && __wt_txn_upd_visible_all(session, delete_tombstone)); - ret = 0; - goto done; - } - -#ifdef HAVE_DIAGNOSTIC - __wt_hs_upd_time_window(hs_cursor, &hs_tw); - WT_ASSERT(session, hs_tw->start_txn == WT_TXN_NONE || hs_tw->start_txn == delete_upd->txnid); - WT_ASSERT(session, hs_tw->start_ts == WT_TS_NONE || hs_tw->start_ts == delete_upd->start_ts); - WT_ASSERT(session, - hs_tw->durable_start_ts == WT_TS_NONE || hs_tw->durable_start_ts == delete_upd->durable_ts); - if (delete_tombstone != NULL) { - WT_ASSERT(session, hs_tw->stop_txn == delete_tombstone->txnid); - WT_ASSERT(session, hs_tw->stop_ts == delete_tombstone->start_ts); - WT_ASSERT(session, hs_tw->durable_stop_ts == delete_tombstone->durable_ts); - } else - WT_ASSERT(session, !WT_TIME_WINDOW_HAS_STOP(hs_tw)); -#endif - - WT_ERR(hs_cursor->remove(hs_cursor)); -done: - if (delete_tombstone != NULL) - F_CLR(delete_tombstone, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS); - F_CLR(delete_upd, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS); - -err: - if (!hs_read_committed) - F_CLR(hs_cursor, WT_CURSTD_HS_READ_COMMITTED); - return (ret); -} - -/* * __hs_insert_record -- * A helper function to insert the record into the history store including stop time point. */ @@ -345,9 +286,45 @@ __hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates, } /* + * __hs_pack_key -- + * Pack the history store key + */ +static inline int +__hs_pack_key(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_RECONCILE *r, WT_INSERT *ins, + WT_ROW *rip, WT_ITEM *key) +{ + WT_DECL_RET; + uint8_t *p; + + switch (r->page->type) { + case WT_PAGE_COL_FIX: + case WT_PAGE_COL_VAR: + p = key->mem; + WT_RET(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(ins))); + key->size = WT_PTRDIFF(p, key->data); + break; + case WT_PAGE_ROW_LEAF: + if (ins == NULL) { + WT_WITH_BTREE( + session, btree, ret = __wt_row_leaf_key(session, r->page, rip, key, false)); + WT_RET(ret); + } else { + key->data = WT_INSERT_KEY(ins); + key->size = WT_INSERT_KEY_SIZE(ins); + } + break; + default: + WT_RET(__wt_illegal_value(session, r->page->type)); + } + + return (ret); +} + +/* * __wt_hs_insert_updates -- - * Copy one set of saved updates into the database's history store table. Whether the function - * fails or succeeds, if there is a successful write to history, cache_write_hs is set to true. + * Copy one set of saved updates into the database's history store table if they haven't been + * moved there. Whether the function fails or succeeds, if there is a successful write to + * history, cache_write_hs is set to true. */ int __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *multi) @@ -367,14 +344,12 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM]; WT_UPDATE_VECTOR updates; WT_SAVE_UPD *list; - WT_UPDATE *delete_tombstone, *delete_upd, *newest_hs, *no_ts_upd, *oldest_upd, *prev_upd, - *ref_upd, *tombstone, *upd; + WT_UPDATE *newest_hs, *no_ts_upd, *oldest_upd, *prev_upd, *ref_upd, *tombstone, *upd; WT_TIME_WINDOW tw; wt_off_t hs_size; uint64_t insert_cnt, max_hs_size, modify_cnt; uint64_t cache_hs_insert_full_update, cache_hs_insert_reverse_modify, cache_hs_write_squash; uint32_t i; - uint8_t *p; int nentries; bool enable_reverse_modify, error_on_ts_ordering, hs_inserted, squashed; @@ -407,65 +382,6 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult if (list->onpage_upd == NULL) continue; - /* History store table key component: source key. */ - switch (r->page->type) { - case WT_PAGE_COL_FIX: - case WT_PAGE_COL_VAR: - p = key->mem; - WT_ERR(__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins))); - key->size = WT_PTRDIFF(p, key->data); - break; - case WT_PAGE_ROW_LEAF: - if (list->ins == NULL) { - WT_WITH_BTREE( - session, btree, ret = __wt_row_leaf_key(session, r->page, list->rip, key, false)); - WT_ERR(ret); - } else { - key->data = WT_INSERT_KEY(list->ins); - key->size = WT_INSERT_KEY_SIZE(list->ins); - } - break; - default: - WT_ERR(__wt_illegal_value(session, r->page->type)); - } - - no_ts_upd = newest_hs = NULL; - ref_upd = list->onpage_upd; - delete_tombstone = delete_upd = NULL; - - __wt_update_vector_clear(&updates); - - /* - * Reverse deltas are only supported on 'S' and 'u' value formats. - */ - enable_reverse_modify = - (WT_STREQ(btree->value_format, "S") || WT_STREQ(btree->value_format, "u")); - - /* - * Delete the update that is both on the update chain and the history store from the history - * store. Otherwise, we will trigger out of order fix when the update is inserted to the - * history store again. - */ - for (upd = list->onpage_tombstone != NULL ? list->onpage_tombstone : list->onpage_upd; - upd != NULL; upd = upd->next) { - if (upd->txnid == WT_TXN_ABORTED) - continue; - - if (F_ISSET(upd, WT_UPDATE_TO_DELETE_FROM_HS)) { - WT_ASSERT_ALWAYS(session, F_ISSET(upd, WT_UPDATE_HS | WT_UPDATE_RESTORED_FROM_HS), - "Attempting to remove an update from the history store in WiredTiger, but the " - "update was missing."); - if (upd->type == WT_UPDATE_TOMBSTONE) - delete_tombstone = upd; - else { - delete_upd = upd; - WT_ERR( - __hs_delete_record(session, hs_cursor, key, delete_upd, delete_tombstone)); - break; - } - } - } - /* Skip aborted updates. */ for (upd = list->onpage_upd->next; upd != NULL && upd->txnid == WT_TXN_ABORTED; upd = upd->next) @@ -479,6 +395,20 @@ __wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_MULTI *mult if (F_ISSET(upd, WT_UPDATE_HS)) continue; + /* History store table key component: source key. */ + WT_ERR(__hs_pack_key(session, btree, r, list->ins, list->rip, key)); + + no_ts_upd = newest_hs = NULL; + ref_upd = list->onpage_upd; + + __wt_update_vector_clear(&updates); + + /* + * Reverse deltas are only supported on 'S' and 'u' value formats. + */ + enable_reverse_modify = + (WT_STREQ(btree->value_format, "S") || WT_STREQ(btree->value_format, "u")); + /* * The algorithm assumes the oldest update on the update chain in memory is either a full * update or a tombstone. @@ -831,7 +761,7 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btre { WT_DECL_RET; WT_ITEM hs_key; - wt_timestamp_t hs_ts; + wt_timestamp_t hs_start_ts; uint64_t hs_counter; uint32_t hs_btree_id; bool hs_read_all_flag; @@ -850,7 +780,7 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btre ret = 0; goto done; } else { - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter)); + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); ++hs_counter; } @@ -880,8 +810,8 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui WT_CURSOR_BTREE *hs_cbt; WT_DECL_RET; WT_ITEM hs_key, hs_value; - WT_TIME_WINDOW hs_insert_tw, tw, *twp; - wt_timestamp_t hs_ts; + WT_TIME_WINDOW hs_insert_tw, *twp; + wt_timestamp_t hs_durable_start_ts, hs_durable_stop_ts, hs_start_ts; uint64_t cache_hs_order_lose_durable_timestamp, cache_hs_order_reinsert, cache_hs_order_remove; uint64_t hs_counter, hs_upd_type; uint32_t hs_btree_id; @@ -958,7 +888,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui continue; /* We shouldn't have crossed the btree and user key search space. */ - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter)); + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); WT_ASSERT(session, hs_btree_id == btree_id); #ifdef HAVE_DIAGNOSTIC WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); @@ -970,7 +900,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui * the cell. The cell's start timestamp can be cleared during reconciliation if it is * globally visible. */ - if (hs_ts >= ts || twp->stop_ts >= ts) + if (hs_start_ts >= ts || twp->stop_ts >= ts) break; } if (ret == WT_NOTFOUND) @@ -1026,7 +956,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui */ for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { /* We shouldn't have crossed the btree and user key search space. */ - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter)); + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); WT_ASSERT(session, hs_btree_id == btree_id); #ifdef HAVE_DIAGNOSTIC WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); @@ -1044,7 +974,7 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui * by ignoring them. */ __wt_hs_upd_time_window(hs_cursor, &twp); - if (hs_ts < ts && twp->stop_ts < ts) + if (hs_start_ts < ts && twp->stop_ts < ts) continue; if (reinsert) { @@ -1098,10 +1028,10 @@ __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, ui /* Extract the underlying value for reinsertion. */ WT_ERR(hs_cursor->get_value( - hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value)); + hs_cursor, &hs_durable_stop_ts, &hs_durable_start_ts, &hs_upd_type, &hs_value)); /* Reinsert the update with corrected timestamps. */ - if (no_ts_tombstone && hs_ts == ts) + if (no_ts_tombstone && hs_start_ts == ts) *counter = hs_counter; /* Insert the value back with different timestamps. */ @@ -1132,3 +1062,92 @@ err: return (ret); } + +/* + * __hs_delete_record -- + * Delete an update from the history store if it is not obsolete + */ +static int +__hs_delete_record( + WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ITEM *key, WT_UPDATE *upd, WT_UPDATE *tombstone) +{ + WT_DECL_RET; + bool hs_read_committed; +#ifdef HAVE_DIAGNOSTIC + WT_TIME_WINDOW *hs_tw; +#endif + + if (r->hs_cursor == NULL) + WT_RET(__wt_curhs_open(session, NULL, &r->hs_cursor)); + hs_read_committed = F_ISSET(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED); + /* Ensure we can see all the content in the history store. */ + F_SET(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED); + + /* No need to delete from the history store if it is already obsolete. */ + if (tombstone != NULL && __wt_txn_upd_visible_all(session, tombstone)) + goto done; + + r->hs_cursor->set_key(r->hs_cursor, 4, S2BT(session)->id, key, WT_TS_MAX, UINT64_MAX); + WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, r->hs_cursor), true); + /* It's possible the value in the history store becomes obsolete concurrently. */ + if (ret == WT_NOTFOUND) { + WT_ASSERT(session, tombstone != NULL && __wt_txn_upd_visible_all(session, tombstone)); + ret = 0; + } else { +#ifdef HAVE_DIAGNOSTIC + __wt_hs_upd_time_window(r->hs_cursor, &hs_tw); + WT_ASSERT(session, hs_tw->start_txn == WT_TXN_NONE || hs_tw->start_txn == upd->txnid); + WT_ASSERT(session, hs_tw->start_ts == WT_TS_NONE || hs_tw->start_ts == upd->start_ts); + WT_ASSERT(session, + hs_tw->durable_start_ts == WT_TS_NONE || hs_tw->durable_start_ts == upd->durable_ts); + if (tombstone != NULL) { + WT_ASSERT(session, hs_tw->stop_txn == tombstone->txnid); + WT_ASSERT(session, hs_tw->stop_ts == tombstone->start_ts); + WT_ASSERT(session, hs_tw->durable_stop_ts == tombstone->durable_ts); + } else + WT_ASSERT(session, !WT_TIME_WINDOW_HAS_STOP(hs_tw)); +#endif + + WT_ERR(r->hs_cursor->remove(r->hs_cursor)); + } +done: + if (tombstone != NULL) + F_CLR(tombstone, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS); + F_CLR(upd, WT_UPDATE_TO_DELETE_FROM_HS | WT_UPDATE_HS); + +err: + if (!hs_read_committed) + F_CLR(r->hs_cursor, WT_CURSTD_HS_READ_COMMITTED); + return (ret); +} + +/* + * __wt_hs_delete_updates -- + * Delete the updates from the history store + */ +int +__wt_hs_delete_updates(WT_SESSION_IMPL *session, WT_RECONCILE *r) +{ + WT_BTREE *btree; + WT_DECL_ITEM(key); + WT_DECL_RET; + WT_DELETE_HS_UPD *delete_hs_upd; + uint32_t i; + + /* Nothing to delete from the history store. */ + if (r->delete_hs_upd == NULL) + return (0); + + btree = S2BT(session); + + WT_RET(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key)); + + for (delete_hs_upd = r->delete_hs_upd, i = 0; i < r->delete_hs_upd_next; ++delete_hs_upd, ++i) { + WT_ERR(__hs_pack_key(session, btree, r, delete_hs_upd->ins, delete_hs_upd->rip, key)); + WT_ERR(__hs_delete_record(session, r, key, delete_hs_upd->upd, delete_hs_upd->tombstone)); + } + +err: + __wt_scr_free(session, &key); + return (ret); +} |