From 1ad10d62ad7ec005efb592abcb9d566ebccadf62 Mon Sep 17 00:00:00 2001 From: Luke Pearson Date: Tue, 24 Aug 2021 22:40:23 +1000 Subject: Apply fixes --- .../wiredtiger/src/include/cell_inline.h | 18 --------- src/third_party/wiredtiger/src/include/extern.h | 2 + .../wiredtiger/src/include/txn_inline.h | 23 ----------- src/third_party/wiredtiger/src/meta/meta_ckpt.c | 34 +++++++++++++++++ src/third_party/wiredtiger/src/reconcile/rec_col.c | 1 - src/third_party/wiredtiger/src/reconcile/rec_row.c | 5 +-- .../wiredtiger/src/reconcile/rec_write.c | 37 +++++++++--------- src/third_party/wiredtiger/src/txn/txn_ckpt.c | 8 ++++ src/third_party/wiredtiger/src/txn/txn_recover.c | 44 ++-------------------- .../wiredtiger/src/txn/txn_rollback_to_stable.c | 2 +- 10 files changed, 69 insertions(+), 105 deletions(-) diff --git a/src/third_party/wiredtiger/src/include/cell_inline.h b/src/third_party/wiredtiger/src/include/cell_inline.h index 2d91b9a8ee6..97b856e8a62 100644 --- a/src/third_party/wiredtiger/src/include/cell_inline.h +++ b/src/third_party/wiredtiger/src/include/cell_inline.h @@ -1044,24 +1044,6 @@ __cell_unpack_window_cleanup(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk __cell_kv_window_cleanup(session, unpack_kv); } -/* - * __cell_pack_kv_window_cleanup -- - * Clean up cells loaded from a previous run while writing to disk. - */ -static inline void -__cell_pack_kv_window_cleanup( - WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK_KV *unpack_kv) -{ - /* - * If the page came from a previous run, reset the transaction ids to "none" and timestamps to 0 - * as appropriate when the cell information is used for packing the new cell. - */ - if (F_ISSET(S2C(session), WT_CONN_RECOVERING) && - dsk->write_gen > S2BT(session)->base_write_gen && - dsk->write_gen < S2BT(session)->run_write_gen) - __cell_kv_window_cleanup(session, unpack_kv); -} - /* * __wt_cell_unpack_addr -- * Unpack an address WT_CELL into a structure. diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 38d8f72a052..f4963f46fd9 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -1090,6 +1090,8 @@ extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_metadata_btree_id_to_uri(WT_SESSION_IMPL *session, uint32_t btree_id, char **uri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_correct_base_write_gen(WT_SESSION_IMPL *session) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_metadata_cursor_close(WT_SESSION_IMPL *session) diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h index 97fea21a14c..7390279687d 100644 --- a/src/third_party/wiredtiger/src/include/txn_inline.h +++ b/src/third_party/wiredtiger/src/include/txn_inline.h @@ -449,13 +449,10 @@ err: static inline uint64_t __wt_txn_oldest_id(WT_SESSION_IMPL *session) { - WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; uint64_t checkpoint_pinned, oldest_id; - bool include_checkpoint_txn; txn_global = &S2C(session)->txn_global; - btree = S2BT_SAFE(session); /* * The metadata is tracked specially because of optimizations for checkpoints. @@ -467,10 +464,6 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * Take a local copy of these IDs in case they are updated while we are checking visibility. */ oldest_id = txn_global->oldest_id; - include_checkpoint_txn = - btree == NULL || (btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); - if (!include_checkpoint_txn) - return (oldest_id); /* * The read of the transaction ID pinned by a checkpoint needs to be carefully ordered: if a @@ -501,14 +494,11 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) static inline void __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp) { - WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; wt_timestamp_t checkpoint_ts, pinned_ts; - bool include_checkpoint_txn; *pinned_tsp = WT_TS_NONE; - btree = S2BT_SAFE(session); txn_global = &S2C(session)->txn_global; /* @@ -519,19 +509,6 @@ __wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp) *pinned_tsp = pinned_ts = txn_global->pinned_timestamp; - /* - * Checkpoint transactions often fall behind ordinary application threads. Take special effort - * to not keep changes pinned in cache if they are only required for the checkpoint and it has - * already seen them. - * - * If there is no active checkpoint or this handle is up to date with the active checkpoint then - * it's safe to ignore the checkpoint ID in the visibility check. - */ - include_checkpoint_txn = - btree == NULL || (btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT)); - if (!include_checkpoint_txn) - return; - /* * The read of checkpoint timestamp needs to be carefully ordered: it needs to be after we have * read the pinned timestamp and the checkpoint generation, otherwise, we may read earlier diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index b4c7c933c62..d58d0149658 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -973,6 +973,40 @@ err: return (ret); } +/* + * __wt_metadata_correct_base_write_gen -- + * Update the connection's base write generation from all files in metadata at then end of the + * recovery checkpoint. + */ +int +__wt_metadata_correct_base_write_gen(WT_SESSION_IMPL *session) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + char *config, *uri; + + uri = NULL; + WT_RET(__wt_metadata_cursor(session, &cursor)); + while ((ret = cursor->next(cursor)) == 0) { + WT_ERR(cursor->get_key(cursor, &uri)); + + if (!WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "tiered:")) + continue; + + WT_ERR(cursor->get_value(cursor, &config)); + + /* Update base write gen to the write gen. */ + WT_ERR(__wt_metadata_update_base_write_gen(session, config)); + } + WT_ERR_NOTFOUND_OK(ret, false); + +err: + if (ret != 0 && uri != NULL) + __wt_err(session, ret, "unable to correct write gen for %s", uri); + WT_TRET(__wt_metadata_cursor_release(session, &cursor)); + return (ret); +} + /* * __wt_meta_ckptlist_to_meta -- * Convert a checkpoint list into its metadata representation. diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c index 417bafebb50..2735d6513a3 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_col.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c @@ -722,7 +722,6 @@ record_loop: twp = &clear_tw; goto compare; } - __cell_pack_kv_window_cleanup(session, page->dsk, vpack); twp = &vpack->tw; /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index a72bc170245..99d887da573 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -808,10 +808,9 @@ __wt_rec_row_leaf( upd = upd_select.upd; /* Take the timestamp from the update or the cell. */ - if (upd == NULL) { - __cell_pack_kv_window_cleanup(session, page->dsk, vpack); + if (upd == NULL) twp = &vpack->tw; - } else + else twp = &upd_select.tw; /* diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 667e8c119b7..f66d898e268 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1516,7 +1516,7 @@ err: * Initialize the page write generation number. */ static void -__rec_set_page_write_gen(WT_PAGE_HEADER *dsk, WT_BTREE *btree) +__rec_set_page_write_gen(WT_BTREE *btree, WT_PAGE_HEADER *dsk) { /* * We increment the block's write generation so it's easy to identify newer versions of blocks @@ -1553,7 +1553,7 @@ __rec_split_write_header(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK dsk->recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : multi->key.recno; - __rec_set_page_write_gen(dsk, btree); + __rec_set_page_write_gen(btree, dsk); dsk->mem_size = multi->size; dsk->u.entries = chunk->entries; dsk->type = page->type; @@ -2087,6 +2087,22 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) ref = r->ref; WT_TIME_AGGREGATE_INIT(&ta); + /* + * If using the history store table eviction path and we found updates that weren't globally + * visible when reconciling this page, copy them into the database's history store. This can + * fail, so try before clearing the page's previous reconciliation state. + */ + if (F_ISSET(r, WT_REC_HS)) + WT_RET(__rec_hs_wrapup(session, r)); + + /* + * Wrap up overflow tracking. If we are about to create a checkpoint, the system must be + * entirely consistent at that point (the underlying block manager is presumably going to do + * some action to resolve the list of allocated/free/whatever blocks that are associated with + * the checkpoint). + */ + WT_RET(__wt_ovfl_track_wrapup(session, page)); + /* * This page may have previously been reconciled, and that information is now about to be * replaced. Make sure it's discarded at some point, and clear the underlying modification @@ -2137,21 +2153,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* Reset the reconciliation state. */ mod->rec_result = 0; - /* - * If using the history store table eviction path and we found updates that weren't globally - * visible when reconciling this page, copy them into the database's history store. - */ - if (F_ISSET(r, WT_REC_HS)) - WT_RET(__rec_hs_wrapup(session, r)); - - /* - * Wrap up overflow tracking. If we are about to create a checkpoint, the system must be - * entirely consistent at that point (the underlying block manager is presumably going to do - * some action to resolve the list of allocated/free/whatever blocks that are associated with - * the checkpoint). - */ - WT_RET(__wt_ovfl_track_wrapup(session, page)); - __wt_verbose(session, WT_VERB_RECONCILE, "%p reconciled into %" PRIu32 " pages", (void *)ref, r->multi_next); @@ -2367,7 +2368,7 @@ __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *k dsk = tmp->mem; memset(dsk, 0, WT_PAGE_HEADER_SIZE); dsk->type = WT_PAGE_OVFL; - __rec_set_page_write_gen(dsk, btree); + __rec_set_page_write_gen(btree, dsk); dsk->u.datalen = (uint32_t)kv->buf.size; memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size); dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size; diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index d6b683dd4f2..63d343c1749 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -921,6 +921,14 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_CONN_SET(session, txn_hs_ckpt_duration, hs_ckpt_duration_usecs); } + /* + * As part of recovery, rollback to stable may have left out clearing stale transaction ids. + * Update the connection base write generation based on the latest checkpoint write generations + * to reset these transaction ids present on the pages when reading them. + */ + if (F_ISSET(conn, WT_CONN_RECOVERING)) + WT_ERR(__wt_metadata_correct_base_write_gen(session)); + /* * Clear the dhandle so the visibility check doesn't get confused about the snap min. Don't * bother restoring the handle since it doesn't make sense to carry a handle across a diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index 9f32e9346f0..c76e2af3597 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -547,39 +547,6 @@ err: return (ret); } -/* - * __recovery_correct_write_gen -- - * Update the connection's base write generation from all files in metadata. - */ -static int -__recovery_correct_write_gen(WT_SESSION_IMPL *session) -{ - WT_CURSOR *cursor; - WT_DECL_RET; - char *config, *uri; - - uri = NULL; - WT_RET(__wt_metadata_cursor(session, &cursor)); - while ((ret = cursor->next(cursor)) == 0) { - WT_ERR(cursor->get_key(cursor, &uri)); - - if (!WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "tiered:")) - continue; - - WT_ERR(cursor->get_value(cursor, &config)); - - /* Update base write gen to the write gen. */ - WT_ERR(__wt_metadata_update_base_write_gen(session, config)); - } - WT_ERR_NOTFOUND_OK(ret, false); - -err: - if (ret != 0 && uri != NULL) - __wt_err(session, ret, "unable to correct write gen for %s", uri); - WT_TRET(__wt_metadata_cursor_release(session, &cursor)); - return (ret); -} - /* * __recovery_setup_file -- * Set up the recovery slot for a file, track the largest file ID, and update the base write gen @@ -1054,17 +1021,12 @@ done: */ WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); - /* - * Rollback to stable may have left out clearing stale transaction ids. Update the connection - * base write generation based on the latest checkpoint write generations to reset them. - */ - if (rts_executed) - WT_ERR(__recovery_correct_write_gen(session)); - /* * Update the open dhandles write generations and base write generation with the connection's * base write generation because the recovery checkpoint writes the pages to disk with new write - * generation number which contains transaction ids that are needed to reset later. + * generation number which contains transaction ids that are needed to reset later. The + * connection level base write generation number is updated at the end of the recovery + * checkpoint. */ __wt_dhandle_update_write_gens(session); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 0a6b7e52cf9..2f2c0ebdf94 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -311,7 +311,7 @@ __rollback_check_if_txnid_non_committed(WT_SESSION_IMPL *session, uint64_t txnid */ if (txnid < conn->recovery_ckpt_snap_min) return (false); - else if (txnid > conn->recovery_ckpt_snap_max) + else if (txnid >= conn->recovery_ckpt_snap_max) return (true); /* -- cgit v1.2.1