diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-06-25 17:34:00 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2020-06-25 17:34:00 +1000 |
commit | f4273b8a9d14ed989477748cd46d51eaccf65140 (patch) | |
tree | 42a43637efac222acf72538c2a53e8c177afc0a6 | |
parent | dd10c6328d7466a8ec21097094233afb5349d844 (diff) | |
download | mongo-f4273b8a9d14ed989477748cd46d51eaccf65140.tar.gz |
Import wiredtiger: eafb0cea2157f288e027824b12506e83fe2f432d from branch mongodb-4.4r4.4.0-rc11
ref: 5a74e438ea..eafb0cea21
for: 4.4.0-rc11
WT-6448 Rollback to stable to read all required pages as part of tree walk
WT-6476 Block running rebalance with timestamp set in test format
8 files changed, 54 insertions, 112 deletions
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 10e3fa0e7a5..b89f2bf5553 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -659,7 +659,7 @@ connection_stats = [ TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'), TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'), TxnStat('txn_rts_pages_visited', 'rollback to stable pages visited'), - TxnStat('txn_rts_skip_interal_pages_walk', 'rollback to stable skipping internal pages tree walk'), + TxnStat('txn_rts_tree_walk_skip_pages', 'rollback to stable tree walk skipping pages'), TxnStat('txn_rts_sweep_hs_keys', 'rollback to stable sweeping history store keys'), TxnStat('txn_rts_upd_aborted', 'rollback to stable updates aborted'), TxnStat('txn_set_ts', 'set timestamp calls'), diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 93efc061937..f4fd781f422 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "5a74e438ea34cc6737438f6c99ce2e5e25519a0e" + "commit": "eafb0cea2157f288e027824b12506e83fe2f432d" } diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 0e662c0a6d4..5b868ad9000 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -719,8 +719,8 @@ struct __wt_connection_stats { int64_t txn_rts_keys_restored; int64_t txn_rts_pages_visited; int64_t txn_rts_hs_restore_tombstones; - int64_t txn_rts_skip_interal_pages_walk; int64_t txn_rts_sweep_hs_keys; + int64_t txn_rts_tree_walk_skip_pages; int64_t txn_rts_upd_aborted; int64_t txn_rts_hs_removed; int64_t txn_set_ts; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 2114928a10d..856e82d43ba 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -5924,10 +5924,10 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1422 /*! transaction: rollback to stable restored tombstones from history store */ #define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1423 -/*! transaction: rollback to stable skipping internal pages tree walk */ -#define WT_STAT_CONN_TXN_RTS_SKIP_INTERAL_PAGES_WALK 1424 /*! transaction: rollback to stable sweeping history store keys */ -#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1425 +#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1424 +/*! transaction: rollback to stable tree walk skipping pages */ +#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1425 /*! transaction: rollback to stable updates aborted */ #define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1426 /*! transaction: rollback to stable updates removed from history store */ diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index bfcb7f43a0c..f3b90b2c734 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -1065,8 +1065,8 @@ static const char *const __stats_connection_desc[] = { "transaction: rollback to stable keys removed", "transaction: rollback to stable keys restored", "transaction: rollback to stable pages visited", "transaction: rollback to stable restored tombstones from history store", - "transaction: rollback to stable skipping internal pages tree walk", "transaction: rollback to stable sweeping history store keys", + "transaction: rollback to stable tree walk skipping pages", "transaction: rollback to stable updates aborted", "transaction: rollback to stable updates removed from history store", "transaction: set timestamp calls", "transaction: set timestamp durable calls", @@ -1565,8 +1565,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->txn_rts_keys_restored = 0; stats->txn_rts_pages_visited = 0; stats->txn_rts_hs_restore_tombstones = 0; - stats->txn_rts_skip_interal_pages_walk = 0; stats->txn_rts_sweep_hs_keys = 0; + stats->txn_rts_tree_walk_skip_pages = 0; stats->txn_rts_upd_aborted = 0; stats->txn_rts_hs_removed = 0; stats->txn_set_ts = 0; @@ -2075,8 +2075,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS * to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored); to->txn_rts_pages_visited += WT_STAT_READ(from, txn_rts_pages_visited); to->txn_rts_hs_restore_tombstones += WT_STAT_READ(from, txn_rts_hs_restore_tombstones); - to->txn_rts_skip_interal_pages_walk += WT_STAT_READ(from, txn_rts_skip_interal_pages_walk); to->txn_rts_sweep_hs_keys += WT_STAT_READ(from, txn_rts_sweep_hs_keys); + to->txn_rts_tree_walk_skip_pages += WT_STAT_READ(from, txn_rts_tree_walk_skip_pages); to->txn_rts_upd_aborted += WT_STAT_READ(from, txn_rts_upd_aborted); to->txn_rts_hs_removed += WT_STAT_READ(from, txn_rts_hs_removed); to->txn_set_ts += WT_STAT_READ(from, txn_set_ts); diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index bb4a96431f0..55a455c96f7 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -766,31 +766,6 @@ __rollback_page_needs_abort( return (result); } -#ifdef HAVE_DIAGNOSTIC -/* - * __rollback_verify_ondisk_page -- - * Verify the on-disk page that it doesn't have updates newer than the timestamp. - */ -static void -__rollback_verify_ondisk_page( - WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t rollback_timestamp) -{ - WT_CELL_UNPACK_KV *vpack, _vpack; - WT_ROW *rip; - uint32_t i; - - vpack = &_vpack; - - /* Review updates that belong to keys that are on the disk image. */ - WT_ROW_FOREACH (page, rip, i) { - __wt_row_leaf_value_cell(session, page, rip, NULL, vpack); - WT_ASSERT(session, vpack->tw.durable_start_ts <= rollback_timestamp); - WT_ASSERT(session, vpack->tw.durable_stop_ts == WT_TS_NONE || - vpack->tw.durable_stop_ts <= rollback_timestamp); - } -} -#endif - /* * __rollback_abort_newer_updates -- * Abort updates on this page newer than the timestamp. @@ -799,11 +774,7 @@ static int __rollback_abort_newer_updates( WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t rollback_timestamp) { - WT_DECL_RET; WT_PAGE *page; - bool local_read; - - local_read = false; /* Review deleted page saved to the ref. */ if (ref->page_del != NULL && rollback_timestamp < ref->page_del->durable_timestamp) { @@ -812,32 +783,18 @@ __rollback_abort_newer_updates( } /* - * If we have a ref with no page, or the page is clean, find out whether the page has any - * modifications that are newer than the given timestamp. As eviction writes the newest version - * to page, even a clean page may also contain modifications that need rollback. Such pages are - * read back into memory and processed like other modified pages. + * If we have a ref with clean page, find out whether the page has any modifications that are + * newer than the given timestamp. As eviction writes the newest version to page, even a clean + * page may also contain modifications that need rollback. */ - if ((page = ref->page) == NULL || !__wt_page_is_modified(page)) { - if (!__rollback_page_needs_abort(session, ref, rollback_timestamp)) { - __wt_verbose(session, WT_VERB_RTS, "%p: page skipped", (void *)ref); -#ifdef HAVE_DIAGNOSTIC - if (ref->page == NULL && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { - WT_RET(__wt_page_in(session, ref, 0)); - if (ref->page->type == WT_PAGE_ROW_LEAF) - __rollback_verify_ondisk_page(session, ref->page, rollback_timestamp); - WT_TRET_BUSY_OK(__wt_page_release_evict(session, ref, WT_READ_NO_SPLIT)); - } -#endif - return (0); - } - - /* Page needs rollback, read it into cache. */ - if (page == NULL) { - WT_RET(__wt_page_in(session, ref, 0)); - local_read = true; - } - page = ref->page; + WT_ASSERT(session, ref->page != NULL); + page = ref->page; + if (!__wt_page_is_modified(page) && + !__rollback_page_needs_abort(session, ref, rollback_timestamp)) { + __wt_verbose(session, WT_VERB_RTS, "%p: page skipped", (void *)ref); + return (0); } + WT_STAT_CONN_INCR(session, txn_rts_pages_visited); __wt_verbose(session, WT_VERB_RTS, "%p: page rolled back when page is modified: %s", (void *)ref, __wt_page_is_modified(page) ? "true" : "false"); @@ -858,16 +815,30 @@ __rollback_abort_newer_updates( */ break; case WT_PAGE_ROW_LEAF: - WT_ERR(__rollback_abort_newer_row_leaf(session, page, rollback_timestamp)); + WT_RET(__rollback_abort_newer_row_leaf(session, page, rollback_timestamp)); break; default: - WT_ERR(__wt_illegal_value(session, page->type)); + WT_RET(__wt_illegal_value(session, page->type)); } -err: - if (local_read) - WT_TRET_BUSY_OK(__wt_page_release_evict(session, ref, WT_READ_NO_SPLIT)); - return (ret); + return (0); +} + +/* + * __rollback_abort_fast_truncate -- + * Abort fast truncate on this page newer than the timestamp. + */ +static int +__rollback_abort_fast_truncate( + WT_SESSION_IMPL *session, WT_REF *ref, wt_timestamp_t rollback_timestamp) +{ + /* Review deleted page saved to the ref. */ + if (ref->page_del != NULL && rollback_timestamp < ref->page_del->durable_timestamp) { + __wt_verbose(session, WT_VERB_RTS, "%p: deleted page rolled back", (void *)ref); + WT_RET(__wt_delete_page_rollback(session, ref)); + } + + return (0); } /* @@ -882,23 +853,15 @@ __wt_rts_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *s rollback_timestamp = *(wt_timestamp_t *)(context); *skipp = false; /* Default to reading */ - /* If the page is in-memory, we want to look at it. */ + /* If the page state is other than on disk, we want to look at it. */ if (ref->state != WT_REF_DISK) return (0); - /* - * Rollback to stable doesn't read leaf pages into memory as part of the tree walk. The leaf - * page is loaded into memory in the caller functions if it has newer updates that are need to - * be aborted. Don't process further on leaf pages as part of tree walk function. - */ - if (!F_ISSET(ref, WT_REF_FLAG_INTERNAL)) - return (0); - /* Check whether this ref has any possible updates to be aborted. */ if (!__rollback_page_needs_abort(session, ref, rollback_timestamp)) { *skipp = true; - __wt_verbose(session, WT_VERB_RTS, "%p: internal page walk skipped", (void *)ref); - WT_STAT_CONN_INCR(session, txn_rts_skip_interal_pages_walk); + __wt_verbose(session, WT_VERB_RTS, "%p: page walk skipped", (void *)ref); + WT_STAT_CONN_INCR(session, txn_rts_tree_walk_skip_pages); } return (0); @@ -917,33 +880,20 @@ __rollback_to_stable_btree_walk(WT_SESSION_IMPL *session, wt_timestamp_t rollbac /* Walk the tree, marking commits aborted where appropriate. */ ref = NULL; while ((ret = __wt_tree_walk_custom_skip(session, &ref, __wt_rts_page_skip, &rollback_timestamp, - WT_READ_CACHE_LEAF | WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 && + WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 && ref != NULL) if (F_ISSET(ref, WT_REF_FLAG_INTERNAL)) { WT_INTL_FOREACH_BEGIN (session, ref->page, child_ref) { - WT_RET(__rollback_abort_newer_updates(session, child_ref, rollback_timestamp)); + WT_RET(__rollback_abort_fast_truncate(session, child_ref, rollback_timestamp)); } WT_INTL_FOREACH_END; - } + } else + WT_RET(__rollback_abort_newer_updates(session, ref, rollback_timestamp)); return (ret); } /* - * __rollback_eviction_drain -- - * Wait for eviction to drain from a tree. - */ -static int -__rollback_eviction_drain(WT_SESSION_IMPL *session, const char *cfg[]) -{ - WT_UNUSED(cfg); - - WT_RET(__wt_evict_file_exclusive_on(session)); - __wt_evict_file_exclusive_off(session); - return (0); -} - -/* * __rollback_to_stable_btree -- * Called for each object handle - choose to either skip or wipe the commits */ @@ -983,15 +933,8 @@ __rollback_to_stable_btree(WT_SESSION_IMPL *session, wt_timestamp_t rollback_tim /* There is nothing to do on an empty tree. */ if (btree->root.page == NULL) return (0); - /* - * Ensure the eviction server is out of the file - we don't want it messing with us. This step - * shouldn't be required, but it simplifies some of the reasoning about what state trees can be - * in. - */ - WT_RET(__wt_evict_file_exclusive_on(session)); - WT_WITH_PAGE_INDEX(session, ret = __rollback_to_stable_btree_walk(session, rollback_timestamp)); - __wt_evict_file_exclusive_off(session); + WT_WITH_PAGE_INDEX(session, ret = __rollback_to_stable_btree_walk(session, rollback_timestamp)); return (ret); } @@ -1302,16 +1245,13 @@ err: * Rollback all modifications with timestamps more recent than the passed in timestamp. */ static int -__rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) +__rollback_to_stable(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; conn = S2C(session); - /* Mark that a rollback operation is in progress and wait for eviction to drain. */ - WT_RET(__wt_conn_btree_apply(session, NULL, __rollback_eviction_drain, NULL, cfg)); - WT_RET(__rollback_to_stable_check(session)); /* @@ -1333,6 +1273,8 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp { WT_DECL_RET; + WT_UNUSED(cfg); + /* * Don't use the connection's default session: we are working on data handles and (a) don't want * to cache all of them forever, plus (b) can't guarantee that no other method will be called @@ -1347,7 +1289,7 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp * entire table sequentially. */ F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE); - ret = __rollback_to_stable(session, cfg); + ret = __rollback_to_stable(session); F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE); WT_RET(ret); diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh index de47afbcf36..e607fd1e942 100755 --- a/src/third_party/wiredtiger/test/format/format.sh +++ b/src/third_party/wiredtiger/test/format/format.sh @@ -57,8 +57,8 @@ smoke_list=( # Temporarily disabled # "$smoke_base_1 file_type=row data_source=lsm" - # Force tree rebalance and the statistics server. - "$smoke_base_1 file_type=row statistics_server=1 rebalance=1" + # Force the statistics server. + "$smoke_base_1 file_type=row statistics_server=1" # Overflow testing. "$smoke_base_2 file_type=row key_min=256" diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py index b4fa7a9087b..4ac28066596 100755 --- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py +++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py @@ -132,7 +132,7 @@ class test_rollback_to_stable12(test_rollback_to_stable_base): keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2] keys_restored = stat_cursor[stat.conn.txn_rts_keys_restored][2] pages_visited = stat_cursor[stat.conn.txn_rts_pages_visited][2] - pages_walk_skipped = stat_cursor[stat.conn.txn_rts_skip_interal_pages_walk][2] + pages_walk_skipped = stat_cursor[stat.conn.txn_rts_tree_walk_skip_pages][2] upd_aborted = stat_cursor[stat.conn.txn_rts_upd_aborted][2] stat_cursor.close() |