diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/history/hs.c')
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 244 |
1 files changed, 141 insertions, 103 deletions
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index 2770c48ad53..ed1db846793 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -9,10 +9,19 @@ #include "wt_internal.h" /* + * WT_HS_TIME_PAIR -- + * A pair containing a timestamp and transaction id. + */ +typedef struct { + wt_timestamp_t timestamp; + uint64_t txnid; +} WT_HS_TIME_PAIR; + +/* * When an operation is accessing the history store table, it should ignore the cache size (since - * the cache is already full), and the operation can't reenter reconciliation. + * the cache is already full). */ -#define WT_HS_SESSION_FLAGS (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_RECONCILE) +#define WT_HS_SESSION_FLAGS WT_SESSION_IGNORE_CACHE_SIZE static int __hs_delete_key_from_pos( WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id, const WT_ITEM *key); @@ -282,7 +291,7 @@ __wt_hs_modify(WT_CURSOR_BTREE *hs_cbt, WT_UPDATE *hs_upd) WT_SESSION_IMPL *session; WT_UPDATE *last_upd; - session = (WT_SESSION_IMPL *)hs_cbt->iface.session; + session = CUR2S(hs_cbt); /* If there are existing updates, append them after the new updates. */ if (hs_cbt->compare == 0) { @@ -355,15 +364,14 @@ __hs_insert_updates_verbose(WT_SESSION_IMPL *session, WT_BTREE *btree) static int __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key, const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, - WT_TIME_PAIR stop_ts_pair) + WT_HS_TIME_PAIR stop_ts_pair) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; - WT_UPDATE *hs_upd; - uint32_t session_flags; + WT_UPDATE *hs_upd, *upd_local; cbt = (WT_CURSOR_BTREE *)cursor; - hs_upd = NULL; + hs_upd = upd_local = NULL; /* * Use WT_CURSOR.set_key and WT_CURSOR.set_value to create key and value items, then use them to @@ -373,23 +381,32 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, W cursor, btree->id, key, upd->start_ts, __wt_atomic_add64(&btree->hs_counter, 1)); cursor->set_value(cursor, stop_ts_pair.timestamp, upd->durable_ts, (uint64_t)type, hs_value); - /* - * Insert a delete record to represent stop time pair for the actual record to be inserted. Set - * the stop time pair as the commit time pair of the history store delete record. - */ - WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL)); - hs_upd->start_ts = stop_ts_pair.timestamp; - hs_upd->durable_ts = stop_ts_pair.timestamp; - hs_upd->txnid = stop_ts_pair.txnid; + /* Allocate a tombstone only when there is a valid stop time pair. */ + if (stop_ts_pair.timestamp != WT_TS_MAX || stop_ts_pair.txnid != WT_TXN_MAX) { + /* + * Insert a delete record to represent stop time pair for the actual record to be inserted. + * Set the stop time pair as the commit time pair of the history store delete record. + */ + WT_ERR(__wt_upd_alloc_tombstone(session, &hs_upd, NULL)); + hs_upd->start_ts = stop_ts_pair.timestamp; + hs_upd->durable_ts = stop_ts_pair.timestamp; + hs_upd->txnid = stop_ts_pair.txnid; + } /* * Append to the delete record, the actual record to be inserted into the history store. Set the * current update start time pair as the commit time pair to the history store record. */ - WT_ERR(__wt_upd_alloc(session, &cursor->value, WT_UPDATE_STANDARD, &hs_upd->next, NULL)); - hs_upd->next->start_ts = upd->start_ts; - hs_upd->next->durable_ts = upd->durable_ts; - hs_upd->next->txnid = upd->txnid; + WT_ERR(__wt_upd_alloc(session, &cursor->value, WT_UPDATE_STANDARD, &upd_local, NULL)); + upd_local->start_ts = upd->start_ts; + upd_local->durable_ts = upd->durable_ts; + upd_local->txnid = upd->txnid; + + /* Insert the standard update as next update if there is a tombstone. */ + if (hs_upd != NULL) + hs_upd->next = upd_local; + else + hs_upd = upd_local; /* * Search the page and insert the updates. We expect there will be no existing data: assert that @@ -425,8 +442,7 @@ err: */ WT_TRET(__wt_cursor_key_order_init(cbt)); #endif - session_flags = session->flags; - F_SET(session, WT_SESSION_IGNORE_HS_TOMBSTONE); + F_SET(cursor, WT_CURSTD_IGNORE_TOMBSTONE); /* We're pointing at the newly inserted update. Iterate once more to avoid deleting it. */ ret = cursor->next(cursor); if (ret == WT_NOTFOUND) @@ -435,8 +451,7 @@ err: WT_TRET(__hs_delete_key_from_pos(session, cursor, btree->id, key)); WT_STAT_CONN_INCR(session, cache_hs_key_truncate_mix_ts); } - if (!FLD_ISSET(session_flags, WT_SESSION_IGNORE_HS_TOMBSTONE)) - F_CLR(session, WT_SESSION_IGNORE_HS_TOMBSTONE); + F_CLR(cursor, WT_CURSTD_IGNORE_TOMBSTONE); } /* We did a row search, release the cursor so that the page doesn't continue being held. */ cursor->reset(cursor); @@ -452,7 +467,7 @@ err: static int __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key, const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, - WT_TIME_PAIR stop_ts_pair) + WT_HS_TIME_PAIR stop_ts_pair) { WT_DECL_RET; @@ -505,7 +520,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT */ static int __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree, const WT_ITEM *key, - const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, WT_TIME_PAIR stop_ts_pair) + const WT_UPDATE *upd, const uint8_t type, const WT_ITEM *hs_value, WT_HS_TIME_PAIR stop_ts_pair) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -526,7 +541,7 @@ __hs_calculate_full_value(WT_SESSION_IMPL *session, WT_ITEM *full_value, WT_UPDA { if (upd->type == WT_UPDATE_MODIFY) { WT_RET(__wt_buf_set(session, full_value, base_full_value, size)); - WT_RET(__wt_modify_apply_item(session, full_value, upd->data, false)); + WT_RET(__wt_modify_apply_item(session, S2BT(session)->value_format, full_value, upd->data)); } else { WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD); full_value->data = upd->data; @@ -541,8 +556,10 @@ __hs_calculate_full_value(WT_SESSION_IMPL *session, WT_ITEM *full_value, WT_UPDA * Copy one set of saved updates into the database's history store table. */ int -__wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MULTI *multi) +__wt_hs_insert_updates(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi) { + WT_BTREE *btree; + WT_CURSOR *cursor; WT_DECL_ITEM(full_value); WT_DECL_ITEM(key); WT_DECL_ITEM(modify_value); @@ -554,9 +571,8 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM]; WT_MODIFY_VECTOR modifies; WT_SAVE_UPD *list; - WT_SESSION_IMPL *session; WT_UPDATE *prev_upd, *upd; - WT_TIME_PAIR stop_ts_pair; + WT_HS_TIME_PAIR stop_ts_pair; wt_off_t hs_size; uint64_t insert_cnt, max_hs_size; uint32_t i; @@ -564,8 +580,9 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL int nentries; bool squashed; + btree = S2BT(session); + cursor = session->hs_cursor; prev_upd = NULL; - session = (WT_SESSION_IMPL *)cursor->session; insert_cnt = 0; __wt_modify_vector_init(session, &modifies); @@ -585,9 +602,6 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL if (list->onpage_upd == NULL) continue; - /* onpage_upd now is always from the update chain */ - WT_ASSERT(session, !F_ISSET(list->onpage_upd, WT_UPDATE_RESTORED_FROM_DISK)); - /* History store table key component: source key. */ switch (page->type) { case WT_PAGE_COL_FIX: @@ -696,13 +710,25 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL __wt_modify_vector_pop(&modifies, &prev_upd); /* - * Set the stop timestamp from durable timestamp instead of commit timestamp. The - * Garbage collection of history store removes the history values once the stop - * timestamp is globally visible. i.e. durable timestamp of data store version. + * For any uncommitted prepared updates written to disk, the stop timestamp of the last + * update moved into the history store should be with max visibility to protect its + * removal by checkpoint garbage collection until the data store update is committed. */ - WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts); - stop_ts_pair.timestamp = prev_upd->durable_ts; - stop_ts_pair.txnid = prev_upd->txnid; + if (prev_upd->prepare_state == WT_PREPARE_INPROGRESS) { + WT_ASSERT(session, + list->onpage_upd == prev_upd || list->onpage_upd->txnid == prev_upd->txnid); + stop_ts_pair.timestamp = WT_TS_MAX; + stop_ts_pair.txnid = WT_TXN_MAX; + } else { + /* + * Set the stop timestamp from durable timestamp instead of commit timestamp. The + * garbage collection of history store removes the history values once the stop + * timestamp is globally visible. i.e. durable timestamp of data store version. + */ + WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts); + stop_ts_pair.timestamp = prev_upd->durable_ts; + stop_ts_pair.txnid = prev_upd->txnid; + } if (prev_upd->type == WT_UPDATE_TOMBSTONE) { WT_ASSERT(session, modifies.size > 0); @@ -764,10 +790,9 @@ __wt_hs_insert_updates(WT_CURSOR *cursor, WT_BTREE *btree, WT_PAGE *page, WT_MUL WT_STAT_CONN_SET(session, cache_hs_ondisk, hs_size); max_hs_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max; if (max_hs_size != 0 && (uint64_t)hs_size > max_hs_size) - WT_PANIC_ERR(session, WT_PANIC, "WiredTigerHS: file size of %" PRIu64 - " exceeds maximum " - "size %" PRIu64, - (uint64_t)hs_size, max_hs_size); + WT_ERR_PANIC(session, WT_PANIC, + "WiredTigerHS: file size of %" PRIu64 " exceeds maximum size %" PRIu64, (uint64_t)hs_size, + max_hs_size); err: if (ret == 0 && insert_cnt > 0) @@ -810,9 +835,6 @@ __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t bt * Note that we need to compare the raw key off the cursor to determine where we are in the * history store as opposed to comparing the embedded data store key since the ordering is not * guaranteed to be the same. - * - * FIXME: We should be repeatedly moving the cursor backwards within the loop instead of doing a - * search near operation each time as it is cheaper. */ cursor->set_key( cursor, btree_id, key, timestamp != WT_TS_NONE ? timestamp : WT_TS_MAX, UINT64_MAX); @@ -863,15 +885,14 @@ __hs_restore_read_timestamp(WT_SESSION_IMPL *session) * prepare conflict will be returned upon reading a prepared update. */ int -__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDATE **updp, - bool allow_prepare, WT_ITEM *on_disk_buf) +__wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, uint64_t recno, + WT_UPDATE_VALUE *upd_value, bool allow_prepare, WT_ITEM *on_disk_buf) { WT_CURSOR *hs_cursor; - WT_DECL_ITEM(hs_key); WT_DECL_ITEM(hs_value); WT_DECL_ITEM(orig_hs_value_buf); WT_DECL_RET; - WT_ITEM recno_key; + WT_ITEM hs_key, recno_key; WT_MODIFY_VECTOR modifies; WT_TXN *txn; WT_UPDATE *mod_upd, *upd; @@ -883,11 +904,10 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA int cmp; bool is_owner, modify; - *updp = NULL; - hs_cursor = NULL; mod_upd = upd = NULL; orig_hs_value_buf = NULL; + WT_CLEAR(hs_key); __wt_modify_vector_init(session, &modifies); txn = session->txn; hs_btree_id = S2BT(session)->id; @@ -914,8 +934,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA key->size = WT_PTRDIFF(p, recno_key_buf); } - /* Allocate buffers for the history store key/value. */ - WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); + /* Allocate buffer for the history store value. */ WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); /* Open a history store table cursor. */ @@ -934,7 +953,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA ret = 0; goto done; } - WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); /* Stop before crossing over to the next btree */ if (hs_btree_id != S2BT(session)->id) @@ -944,7 +963,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA * Keys are sorted in an order, skip the ones before the desired key, and bail out if we have * crossed over the desired key and not found the record we are looking for. */ - WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp)); + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); if (cmp != 0) goto done; @@ -956,6 +975,13 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA WT_ASSERT(session, upd_type != WT_UPDATE_TOMBSTONE); /* + * If the caller has signalled they don't need the value buffer, don't bother reconstructing a + * modify update or copying the contents into the value buffer. + */ + if (upd_value->skip_buf) + goto skip_buf; + + /* * Keep walking until we get a non-modify update. Once we get to that point, squash the updates * together. */ @@ -1008,9 +1034,9 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA * reverse deltas on top of. */ WT_ERR(hs_cursor->get_key( - hs_cursor, &hs_btree_id, hs_key, &hs_start_ts_tmp, &hs_counter_tmp)); + hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts_tmp, &hs_counter_tmp)); - WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp)); + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); if (cmp != 0) { /* Fallback to the onpage value as the base value. */ @@ -1028,7 +1054,7 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD); while (modifies.size > 0) { __wt_modify_vector_pop(&modifies, &mod_upd); - WT_ERR(__wt_modify_apply_item(session, hs_value, mod_upd->data, false)); + WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data)); __wt_free_update_list(session, &mod_upd); mod_upd = NULL; } @@ -1037,19 +1063,18 @@ __wt_find_hs_upd(WT_SESSION_IMPL *session, WT_ITEM *key, uint64_t recno, WT_UPDA WT_STAT_CONN_INCR(session, cache_hs_read_squash); } - /* Allocate an update structure for the record found. */ - WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &upd, NULL)); - upd->txnid = WT_TXN_NONE; - upd->durable_ts = durable_timestamp; - upd->start_ts = hs_start_ts; - upd->prepare_state = upd->start_ts == upd->durable_ts ? WT_PREPARE_INIT : WT_PREPARE_RESOLVED; - /* - * We're not keeping this in our update list as we want to get rid of it after the read has been - * dealt with. Mark this update as external and to be discarded when not needed. + * Potential optimization: We can likely get rid of this copy and the update allocation above. + * We already have buffers containing the modify values so there's no good reason to allocate an + * update other than to work with our modify vector implementation. */ - F_SET(upd, WT_UPDATE_RESTORED_FROM_DISK); - *updp = upd; + WT_ERR(__wt_buf_set(session, &upd_value->buf, hs_value->data, hs_value->size)); +skip_buf: + upd_value->start_ts = hs_start_ts; + upd_value->txnid = WT_TXN_NONE; + upd_value->type = upd_type; + upd_value->prepare_state = + (hs_start_ts == durable_timestamp) ? WT_PREPARE_INIT : WT_PREPARE_RESOLVED; done: err: @@ -1059,7 +1084,7 @@ err: __wt_scr_free(session, &orig_hs_value_buf); else __wt_scr_free(session, &hs_value); - __wt_scr_free(session, &hs_key); + WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0); /* * Restore the read timestamp if we encountered an error while processing a modify. There's no @@ -1172,17 +1197,19 @@ __wt_hs_delete_key(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *k return (0); WT_RET(__wt_hs_cursor(session, &session_flags, &is_owner)); + /* * In order to delete a key range, we need to be able to inspect all history store records * regardless of their stop time pairs. */ - F_SET(session, WT_SESSION_IGNORE_HS_TOMBSTONE); + F_SET(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); + /* The tree structure can change while we try to insert the mod list, retry if that happens. */ while ((ret = __hs_delete_key_int(session, btree_id, key)) == WT_RESTART) ; - if (!FLD_ISSET(session_flags, WT_SESSION_IGNORE_HS_TOMBSTONE)) - F_CLR(session, WT_SESSION_IGNORE_HS_TOMBSTONE); + F_CLR(session->hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); + WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner)); return (ret); } @@ -1252,29 +1279,38 @@ err: static int __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32_t this_btree_id) { - WT_CURSOR *cursor; - WT_DECL_ITEM(hs_key); + WT_CURSOR *hs_cursor; WT_DECL_ITEM(prev_hs_key); - WT_DECL_ITEM(tmp); WT_DECL_RET; + WT_ITEM hs_key; wt_timestamp_t hs_start_ts; uint64_t hs_counter; uint32_t btree_id; int cmp; bool found; - cursor = session->hs_cursor; + hs_cursor = session->hs_cursor; + WT_CLEAR(hs_key); - WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); WT_ERR(__wt_scr_alloc(session, 0, &prev_hs_key)); /* + * We need to be able to iterate over the history store content for another table. In order to + * do this, we must ignore non-globally visible tombstones in the history store since every + * history store record is succeeded with a tombstone. We also need to skip the non-globally + * visible tombstones in the data table to verify the corresponding entries in the history store + * are too present in the data store. + */ + F_SET(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); + F_SET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); + + /* * The caller is responsible for positioning the history store cursor at the first record to * verify. When we return after moving to a new key the caller is responsible for keeping the * cursor there or deciding they're done. */ - for (; ret == 0; ret = cursor->next(cursor)) { - WT_ERR(cursor->get_key(cursor, &btree_id, hs_key, &hs_start_ts, &hs_counter)); + for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter)); /* * If the btree id does not match the preview one, we're done. It is up to the caller to set @@ -1290,34 +1326,34 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32 * If we have already checked against this key, keep going to the next key. We only need to * check the key once. */ - WT_ERR(__wt_compare(session, NULL, hs_key, prev_hs_key, &cmp)); + WT_ERR(__wt_compare(session, NULL, &hs_key, prev_hs_key, &cmp)); if (cmp == 0) continue; - WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(cbt, hs_key, false, NULL, false, NULL)); + WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(cbt, &hs_key, false, NULL, false, NULL)); WT_ERR(ret); -/* FIXME: temporarily disable hs verification. */ -#if 0 found = cbt->compare == 0; -#else - found = true; -#endif WT_ERR(__cursor_reset(cbt)); - if (!found) - WT_ERR_MSG(session, WT_PANIC, + if (!found) { + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + WT_ERR_PANIC(session, WT_PANIC, "the associated history store key %s was not found in the data store %s", - __wt_buf_set_printable(session, hs_key->data, hs_key->size, prev_hs_key), + __wt_buf_set_printable(session, hs_key.data, hs_key.size, prev_hs_key), session->dhandle->name); + } - /* Swap current/previous buffers. */ - tmp = hs_key; - hs_key = prev_hs_key; - prev_hs_key = tmp; + /* + * Copy the key memory into our scratch buffer. The key will get invalidated on our next + * cursor iteration. + */ + WT_ERR(__wt_buf_set(session, prev_hs_key, hs_key.data, hs_key.size)); } WT_ERR_NOTFOUND_OK(ret, true); err: - __wt_scr_free(session, &hs_key); + F_CLR(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE); + F_CLR(hs_cursor, WT_CURSTD_IGNORE_TOMBSTONE); + WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0); __wt_scr_free(session, &prev_hs_key); return (ret); } @@ -1370,8 +1406,8 @@ __wt_history_store_verify(WT_SESSION_IMPL *session) { WT_CURSOR *cursor, *data_cursor; WT_DECL_ITEM(buf); - WT_DECL_ITEM(hs_key); WT_DECL_RET; + WT_ITEM hs_key; wt_timestamp_t hs_start_ts; uint64_t hs_counter; uint32_t btree_id, session_flags; @@ -1382,13 +1418,13 @@ __wt_history_store_verify(WT_SESSION_IMPL *session) WT_ASSERT(session, S2C(session)->default_session != session); cursor = data_cursor = NULL; + WT_CLEAR(hs_key); btree_id = WT_BTREE_ID_INVALID; session_flags = 0; /* [-Wconditional-uninitialized] */ uri_data = NULL; is_owner = false; /* [-Wconditional-uninitialized] */ WT_ERR(__wt_scr_alloc(session, 0, &buf)); - WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner)); cursor = session->hs_cursor; ret = cursor->next(cursor); @@ -1405,12 +1441,14 @@ __wt_history_store_verify(WT_SESSION_IMPL *session) * The cursor is positioned either from above or left over from the internal call on the * first key of a new btree id. */ - WT_ERR(cursor->get_key(cursor, &btree_id, hs_key, &hs_start_ts, &hs_counter)); - if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0) - WT_ERR_MSG(session, WT_PANIC, + WT_ERR(cursor->get_key(cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter)); + if ((ret = __wt_metadata_btree_id_to_uri(session, btree_id, &uri_data)) != 0) { + F_SET(S2C(session), WT_CONN_DATA_CORRUPTION); + WT_ERR_PANIC(session, WT_PANIC, "Unable to find btree id %" PRIu32 " in the metadata file for the associated history store key %s", - btree_id, __wt_buf_set_printable(session, hs_key->data, hs_key->size, buf)); + btree_id, __wt_buf_set_printable(session, hs_key.data, hs_key.size, buf)); + } WT_ERR(__wt_open_cursor(session, uri_data, NULL, NULL, &data_cursor)); F_SET(data_cursor, WT_CURSOR_RAW_OK); ret = __verify_history_store_id(session, (WT_CURSOR_BTREE *)data_cursor, btree_id); @@ -1423,7 +1461,7 @@ err: WT_TRET(__wt_hs_cursor_close(session, session_flags, is_owner)); __wt_scr_free(session, &buf); - __wt_scr_free(session, &hs_key); + WT_ASSERT(session, hs_key.mem == NULL && hs_key.memsize == 0); __wt_free(session, uri_data); return (ret); } |