20 files changed, 1104 insertions, 280 deletions
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 23ba8dc737e..689aa30c192 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -336,6 +336,7 @@ RLEs
 RMW
 RNG
 RPC
+RTS
 RUNDIR
 RWLOCK
 RXB
@@ -1251,6 +1252,7 @@ srch
 ssize
 startup
 statlog
+stb
 stderr
 stdin
 stdout
@@ -1308,6 +1310,7 @@ timedwait
 timestamp
 timestamped
 timestamps
+tinfo
 tmp
 todo
 tokenizer
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index f4400cc8bdf..c7bfc07b404 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -651,10 +651,11 @@ connection_stats = [
     TxnStat('txn_rollback', 'transactions rolled back'),
     TxnStat('txn_rts', 'rollback to stable calls'),
     TxnStat('txn_rts_hs_removed', 'rollback to stable updates removed from history store'),
-    TxnStat('txn_rts_sweep_hs_keys', 'rollback to stable sweeping history store keys'),
     TxnStat('txn_rts_keys_removed', 'rollback to stable keys removed'),
     TxnStat('txn_rts_keys_restored', 'rollback to stable keys restored'),
     TxnStat('txn_rts_pages_visited', 'rollback to stable pages visited'),
+    TxnStat('txn_rts_skip_interal_pages_walk', 'rollback to stable skipping internal pages tree walk'),
+    TxnStat('txn_rts_sweep_hs_keys', 'rollback to stable sweeping history store keys'),
     TxnStat('txn_rts_upd_aborted', 'rollback to stable updates aborted'),
     TxnStat('txn_set_ts', 'set timestamp calls'),
     TxnStat('txn_set_ts_durable', 'set timestamp durable calls'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 3c04510d191..f36da07c129 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-4.4",
-    "commit": "930bbacc3761a10483875585dbd4ecb58271d57e"
+    "commit": "ab40833d9130b71f4b36a1a03fd8f4f137d11bdd"
 }
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 03d36bf5d07..78b47ecd6aa 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -306,10 +306,38 @@ __wt_hs_cursor_close(WT_SESSION_IMPL *session, uint32_t session_flags, bool is_o
 static int
 __hs_row_search(WT_CURSOR_BTREE *hs_cbt, WT_ITEM *srch_key, bool insert)
 {
+    WT_CURSOR *hs_cursor;
     WT_DECL_RET;
+    bool leaf_found;
+
+    hs_cursor = &hs_cbt->iface;
+    leaf_found = false;
+
+    /*
+     * Check whether the search key can be find in the provided leaf page, if exists. Otherwise
+     * perform a full search.
+     */
+    if (hs_cbt->ref != NULL) {
+        WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt),
+          ret = __wt_row_search(hs_cbt, srch_key, insert, hs_cbt->ref, false, &leaf_found));
+        WT_RET(ret);
+
+        /*
+         * Only use the pinned page search results if search returns an exact match or a slot other
+         * than the page's boundary slots, if that's not the case, the record might belong on an
+         * entirely different page.
+         */
+        if (leaf_found && (hs_cbt->compare != 0 &&
+                            (hs_cbt->slot == 0 || hs_cbt->slot == hs_cbt->ref->page->entries - 1)))
+            leaf_found = false;
+        if (!leaf_found)
+            hs_cursor->reset(hs_cursor);
+    }
+
+    if (!leaf_found)
+        WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt),
+          ret = __wt_row_search(hs_cbt, srch_key, insert, NULL, false, NULL));
 
-    WT_WITH_BTREE(CUR2S(hs_cbt), CUR2BT(hs_cbt),
-      ret = __wt_row_search(hs_cbt, srch_key, insert, NULL, false, NULL));
 #ifdef HAVE_DIAGNOSTIC
     WT_TRET(__wt_cursor_key_order_init(hs_cbt));
 #endif
@@ -398,18 +426,49 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, W
   WT_HS_TIME_POINT *stop_time_point)
 {
     WT_CURSOR_BTREE *cbt;
+    WT_DECL_ITEM(hs_key);
     WT_DECL_RET;
     WT_UPDATE *hs_upd, *upd_local;
+    wt_timestamp_t hs_start_ts;
+    uint64_t counter, hs_counter;
+    uint32_t hs_btree_id;
+    int cmp;
 
     cbt = (WT_CURSOR_BTREE *)cursor;
     hs_upd = upd_local = NULL;
+    counter = 0;
+
+    /* Allocate buffers for the data store and history store key. */
+    WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
+
+    /*
+     * Adjust counter if there exists an update in the history store with same btree id, key and
+     * timestamp. Otherwise the newly inserting history store record may fall behind the existing
+     * one can lead to wrong order.
+     */
+    WT_ERR_NOTFOUND_OK(
+      __wt_hs_cursor_position(session, cursor, btree->id, key, upd->start_ts), true);
+    if (ret == 0) {
+        WT_ERR(cursor->get_key(cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
+
+        /*
+         * Check the whether the existing record is also from the same timestamp.
+         *
+         * Verify simple checks first to confirm whether the retrieved update same or not before
+         * performing the expensive key comparison.
+         */
+        if (hs_btree_id == btree->id && upd->start_ts == hs_start_ts) {
+            WT_ERR(__wt_compare(session, NULL, hs_key, key, &cmp));
+            if (cmp == 0)
+                counter = hs_counter + 1;
+        }
+    }
 
     /*
      * Use WT_CURSOR.set_key and WT_CURSOR.set_value to create key and value items, then use them to
      * create an update chain for a direct insertion onto the history store page.
      */
-    cursor->set_key(
-      cursor, btree->id, key, upd->start_ts, __wt_atomic_add64(&btree->hs_counter, 1));
+    cursor->set_key(cursor, btree->id, key, upd->start_ts, counter);
     cursor->set_value(
       cursor, stop_time_point->durable_ts, upd->durable_ts, (uint64_t)type, hs_value);
 
@@ -452,6 +511,7 @@ __hs_insert_record_with_btree_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, W
     WT_STAT_CONN_INCR(session, cache_hs_insert);
 
 err:
+    __wt_scr_free(session, &hs_key);
     if (ret != 0) {
         __wt_free_update_list(session, &hs_upd);
 
@@ -950,7 +1010,7 @@ err:
  */
 int
 __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
-  WT_ITEM *key, wt_timestamp_t timestamp)
+  const WT_ITEM *key, wt_timestamp_t timestamp)
 {
     WT_DECL_ITEM(srch_key);
     WT_DECL_RET;
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index abf19855a3f..b691552dc1f 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -169,7 +169,6 @@ struct __wt_btree {
     uint64_t write_gen;   /* Write generation */
     uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
     wt_timestamp_t rec_max_timestamp;
-    uint64_t hs_counter; /* History store counter */
 
     uint64_t checkpoint_gen;       /* Checkpoint generation */
     WT_SESSION_IMPL *sync_session; /* Syncing session */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 9ada3107728..a55267f1bc0 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -763,7 +763,7 @@ extern int __wt_hs_cursor_close(WT_SESSION_IMPL *session, uint32_t session_flags
 extern int __wt_hs_cursor_open(WT_SESSION_IMPL *session)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
-  WT_ITEM *key, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+  const WT_ITEM *key, wt_timestamp_t timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id,
   const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_hs_get_btree(WT_SESSION_IMPL *session, WT_BTREE **hs_btreep)
@@ -1239,6 +1239,8 @@ extern int __wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_IT
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *leaf,
   bool leaf_safe, bool *leaf_foundp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rts_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
+  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 3df5aa605d8..50cc22f6d4f 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -714,6 +714,7 @@ struct __wt_connection_stats {
     int64_t txn_rts_keys_removed;
     int64_t txn_rts_keys_restored;
     int64_t txn_rts_pages_visited;
+    int64_t txn_rts_skip_interal_pages_walk;
     int64_t txn_rts_sweep_hs_keys;
     int64_t txn_rts_upd_aborted;
     int64_t txn_rts_hs_removed;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a3abb6c26af..efc658938d0 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -5886,106 +5886,108 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_TXN_RTS_KEYS_RESTORED		1417
 /*! transaction: rollback to stable pages visited */
 #define	WT_STAT_CONN_TXN_RTS_PAGES_VISITED		1418
+/*! transaction: rollback to stable skipping internal pages tree walk */
+#define	WT_STAT_CONN_TXN_RTS_SKIP_INTERAL_PAGES_WALK	1419
 /*! transaction: rollback to stable sweeping history store keys */
-#define	WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS		1419
+#define	WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS		1420
 /*! transaction: rollback to stable updates aborted */
-#define	WT_STAT_CONN_TXN_RTS_UPD_ABORTED		1420
+#define	WT_STAT_CONN_TXN_RTS_UPD_ABORTED		1421
 /*! transaction: rollback to stable updates removed from history store */
-#define	WT_STAT_CONN_TXN_RTS_HS_REMOVED			1421
+#define	WT_STAT_CONN_TXN_RTS_HS_REMOVED			1422
 /*! transaction: set timestamp calls */
-#define	WT_STAT_CONN_TXN_SET_TS				1422
+#define	WT_STAT_CONN_TXN_SET_TS				1423
 /*! transaction: set timestamp durable calls */
-#define	WT_STAT_CONN_TXN_SET_TS_DURABLE			1423
+#define	WT_STAT_CONN_TXN_SET_TS_DURABLE			1424
 /*! transaction: set timestamp durable updates */
-#define	WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD		1424
+#define	WT_STAT_CONN_TXN_SET_TS_DURABLE_UPD		1425
 /*! transaction: set timestamp oldest calls */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1425
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1426
 /*! transaction: set timestamp oldest updates */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1426
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1427
 /*! transaction: set timestamp stable calls */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1427
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1428
 /*! transaction: set timestamp stable updates */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1428
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1429
 /*! transaction: transaction begins */
-#define	WT_STAT_CONN_TXN_BEGIN				1429
+#define	WT_STAT_CONN_TXN_BEGIN				1430
 /*! transaction: transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1430
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1431
 /*! transaction: transaction checkpoint generation */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1431
+#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1432
 /*!
  * transaction: transaction checkpoint history store file duration
  * (usecs)
  */
-#define	WT_STAT_CONN_TXN_HS_CKPT_DURATION		1432
+#define	WT_STAT_CONN_TXN_HS_CKPT_DURATION		1433
 /*! transaction: transaction checkpoint max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1433
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1434
 /*! transaction: transaction checkpoint min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1434
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1435
 /*! transaction: transaction checkpoint most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1435
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1436
 /*! transaction: transaction checkpoint prepare currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING	1436
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING	1437
 /*! transaction: transaction checkpoint prepare max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX		1437
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX		1438
 /*! transaction: transaction checkpoint prepare min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN		1438
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN		1439
 /*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT		1439
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT		1440
 /*! transaction: transaction checkpoint prepare total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL		1440
+#define	WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL		1441
 /*! transaction: transaction checkpoint scrub dirty target */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1441
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1442
 /*! transaction: transaction checkpoint scrub time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1442
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1443
 /*! transaction: transaction checkpoint total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1443
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1444
 /*! transaction: transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1444
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1445
 /*!
  * transaction: transaction checkpoints skipped because database was
  * clean
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1445
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1446
 /*! transaction: transaction failures due to history store */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1446
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1447
 /*!
  * transaction: transaction fsync calls for checkpoint after allocating
  * the transaction ID
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1447
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1448
 /*!
  * transaction: transaction fsync duration for checkpoint after
  * allocating the transaction ID (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1448
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1449
 /*! transaction: transaction range of IDs currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_RANGE			1449
+#define	WT_STAT_CONN_TXN_PINNED_RANGE			1450
 /*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1450
+#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1451
 /*! transaction: transaction range of timestamps currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1451
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1452
 /*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1452
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1453
 /*!
  * transaction: transaction range of timestamps pinned by the oldest
  * active read timestamp
  */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER	1453
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER	1454
 /*!
  * transaction: transaction range of timestamps pinned by the oldest
  * timestamp
  */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1454
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1455
 /*! transaction: transaction read timestamp of the oldest active reader */
-#define	WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ	1455
+#define	WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ	1456
 /*! transaction: transaction sync calls */
-#define	WT_STAT_CONN_TXN_SYNC				1456
+#define	WT_STAT_CONN_TXN_SYNC				1457
 /*! transaction: transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1457
+#define	WT_STAT_CONN_TXN_COMMIT				1458
 /*! transaction: transactions rolled back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1458
+#define	WT_STAT_CONN_TXN_ROLLBACK			1459
 /*! transaction: update conflicts */
-#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1459
+#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1460
 
 /*!
  * @}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 5e481d850f6..4c0b38d5192 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1053,6 +1053,7 @@ static const char *const __stats_connection_desc[] = {
   "transaction: read timestamp queue inserts total", "transaction: read timestamp queue length",
   "transaction: rollback to stable calls", "transaction: rollback to stable keys removed",
   "transaction: rollback to stable keys restored", "transaction: rollback to stable pages visited",
+  "transaction: rollback to stable skipping internal pages tree walk",
   "transaction: rollback to stable sweeping history store keys",
   "transaction: rollback to stable updates aborted",
   "transaction: rollback to stable updates removed from history store",
@@ -1547,6 +1548,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
     stats->txn_rts_keys_removed = 0;
     stats->txn_rts_keys_restored = 0;
     stats->txn_rts_pages_visited = 0;
+    stats->txn_rts_skip_interal_pages_walk = 0;
     stats->txn_rts_sweep_hs_keys = 0;
     stats->txn_rts_upd_aborted = 0;
     stats->txn_rts_hs_removed = 0;
@@ -2050,6 +2052,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
     to->txn_rts_keys_removed += WT_STAT_READ(from, txn_rts_keys_removed);
     to->txn_rts_keys_restored += WT_STAT_READ(from, txn_rts_keys_restored);
     to->txn_rts_pages_visited += WT_STAT_READ(from, txn_rts_pages_visited);
+    to->txn_rts_skip_interal_pages_walk += WT_STAT_READ(from, txn_rts_skip_interal_pages_walk);
     to->txn_rts_sweep_hs_keys += WT_STAT_READ(from, txn_rts_sweep_hs_keys);
     to->txn_rts_upd_aborted += WT_STAT_READ(from, txn_rts_upd_aborted);
     to->txn_rts_hs_removed += WT_STAT_READ(from, txn_rts_hs_removed);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 48dc34a2cf7..13c3725659d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -819,6 +819,40 @@ err:
 }
 
 /*
+ * __wt_rts_page_skip --
+ *     Skip if rollback to stable doesn't requires to read this page.
+ */
+int
+__wt_rts_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
+{
+    wt_timestamp_t rollback_timestamp;
+
+    rollback_timestamp = *(wt_timestamp_t *)(context);
+    *skipp = false; /* Default to reading */
+
+    /* If the page is in-memory, we want to look at it. */
+    if (ref->state != WT_REF_DISK)
+        return (0);
+
+    /*
+     * Rollback to stable doesn't read leaf pages into memory as part of the tree walk. The leaf
+     * page is loaded into memory in the caller functions if it has newer updates that are need to
+     * be aborted. Don't process further on leaf pages as part of tree walk function.
+     */
+    if (!F_ISSET(ref, WT_REF_FLAG_INTERNAL))
+        return (0);
+
+    /* Check whether this ref has any possible updates to be aborted. */
+    if (!__rollback_page_needs_abort(session, ref, rollback_timestamp)) {
+        *skipp = true;
+        __wt_verbose(session, WT_VERB_RTS, "%p: internal page walk skipped", (void *)ref);
+        WT_STAT_CONN_INCR(session, txn_rts_skip_interal_pages_walk);
+    }
+
+    return (0);
+}
+
+/*
  * __rollback_to_stable_btree_walk --
  *     Called for each open handle - choose to either skip or wipe the commits
  */
@@ -830,8 +864,8 @@ __rollback_to_stable_btree_walk(WT_SESSION_IMPL *session, wt_timestamp_t rollbac
 
     /* Walk the tree, marking commits aborted where appropriate. */
     ref = NULL;
-    while ((ret = __wt_tree_walk(
-              session, &ref, WT_READ_CACHE_LEAF | WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 &&
+    while ((ret = __wt_tree_walk_custom_skip(session, &ref, __wt_rts_page_skip, &rollback_timestamp,
+              WT_READ_CACHE_LEAF | WT_READ_NO_EVICT | WT_READ_WONT_NEED)) == 0 &&
       ref != NULL)
         if (F_ISSET(ref, WT_REF_FLAG_INTERNAL)) {
             WT_INTL_FOREACH_BEGIN (session, ref->page, child_ref) {
diff --git a/src/third_party/wiredtiger/test/format/bulk.c b/src/third_party/wiredtiger/test/format/bulk.c
index 89e0406c3cb..72c17366639 100644
--- a/src/third_party/wiredtiger/test/format/bulk.c
+++ b/src/third_party/wiredtiger/test/format/bulk.c
@@ -59,7 +59,7 @@ bulk_commit_transaction(WT_SESSION *session)
     testutil_check(session->commit_transaction(session, buf));
 
     /* Update the oldest timestamp, otherwise updates are pinned in memory. */
-    timestamp_once(session);
+    timestamp_once(session, false);
 }
 
 /*
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 1db9cc5854c..374bc470f5b 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -942,7 +942,9 @@ config_transaction(void)
         if (g.c_txn_freq != 100 && config_is_perm("transaction.frequency"))
             testutil_die(EINVAL, "snapshot isolation requires transaction frequency set to 100");
     }
-
+    if (g.c_txn_rollback_to_stable && config_is_perm("transaction.rollback_to_stable") &&
+      g.c_isolation_flag != ISOLATION_SNAPSHOT && config_is_perm("transaction.isolation"))
+        testutil_die(EINVAL, "rollback to stable requires snapshot isolation");
     /*
      * The permanent configuration has no incompatible settings, adjust the temporary configuration
      * as necessary. Prepare overrides timestamps, overrides isolation, for no reason other than
@@ -958,6 +960,12 @@ config_transaction(void)
         if (g.c_txn_freq != 100)
             config_single("transaction.frequency=100", false);
     }
+    if (g.c_txn_rollback_to_stable) {
+        if (!g.c_txn_timestamps)
+            config_single("transaction.timestamps=on", false);
+        if (g.c_logging)
+            config_single("logging=off", false);
+    }
     if (g.c_txn_timestamps) {
         if (g.c_isolation_flag != ISOLATION_SNAPSHOT)
             config_single("transaction.isolation=snapshot", false);
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 30b8f2a09a6..b8f2922021e 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -57,97 +57,93 @@ typedef struct {
     char **vstr;      /* Value for string options */
 } CONFIG;
 
-#define COMPRESSION_LIST "(none | lz4 | snappy | zlib | zstd)"
+#define COMPRESSION_LIST " (none | lz4 | snappy | zlib | zstd)"
 
 static CONFIG c[] = {
   /* 5% */
-  {"assert.commit_timestamp", "if assert commit_timestamp", C_BOOL, 5, 0, 0,
+  {"assert.commit_timestamp", "assert commit_timestamp", C_BOOL, 5, 0, 0,
     &g.c_assert_commit_timestamp, NULL},
 
   /* 5% */
-  {"assert.read_timestamp", "if assert read_timestamp", C_BOOL, 5, 0, 0, &g.c_assert_read_timestamp,
+  {"assert.read_timestamp", "assert read_timestamp", C_BOOL, 5, 0, 0, &g.c_assert_read_timestamp,
     NULL},
 
   /* 20% */
-  {"backup", "if backups are enabled", C_BOOL, 20, 0, 0, &g.c_backups, NULL},
+  {"backup", "configure backups", C_BOOL, 20, 0, 0, &g.c_backups, NULL},
 
-  {"backup.incremental", "type of backup (block | log | off)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"backup.incremental", "backup type (block | log | off)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_backup_incremental},
 
-  {"backup.incr_granularity", "incremental backup block granularity in KB", 0x0, 4, 16384, 16384,
+  {"backup.incr_granularity", "incremental backup block granularity (KB)", 0x0, 4, 16384, 16384,
     &g.c_backup_incr_granularity, NULL},
 
-  {"btree.bitcnt", "number of bits for fixed-length column-store files", 0x0, 1, 8, 8, &g.c_bitcnt,
-    NULL},
+  {"btree.bitcnt", "fixed-length column-store object size (number of bits)", 0x0, 1, 8, 8,
+    &g.c_bitcnt, NULL},
 
-  {"btree.compression", "type of compression " COMPRESSION_LIST, C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"btree.compression", "compression type" COMPRESSION_LIST, C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_compression},
 
   /* 20% */
-  {"btree.dictionary", "if values are dictionary compressed", C_BOOL, 20, 0, 0, &g.c_dictionary,
+  {"btree.dictionary", "configure dictionary compressed values", C_BOOL, 20, 0, 0, &g.c_dictionary,
     NULL},
 
   /* 20% */
-  {"btree.huffman_key", "if keys are huffman encoded", C_BOOL, 20, 0, 0, &g.c_huffman_key, NULL},
+  {"btree.huffman_key", "configure huffman encoded keys", C_BOOL, 20, 0, 0, &g.c_huffman_key, NULL},
 
   /* 20% */
-  {"btree.huffman_value", "if values are huffman encoded", C_BOOL, 20, 0, 0, &g.c_huffman_value,
+  {"btree.huffman_value", "configure huffman encoded values", C_BOOL, 20, 0, 0, &g.c_huffman_value,
     NULL},
 
   /* 95% */
-  {"btree.internal_key_truncation", "if internal keys are truncated", C_BOOL, 95, 0, 0,
+  {"btree.internal_key_truncation", "truncate internal keys", C_BOOL, 95, 0, 0,
     &g.c_internal_key_truncation, NULL},
 
-  {"btree.internal_page_max", "maximum size of Btree internal nodes", 0x0, 9, 17, 27,
+  {"btree.internal_page_max", "btree internal node maximum size", 0x0, 9, 17, 27,
     &g.c_intl_page_max, NULL},
 
-  {"btree.key_gap", "gap between instantiated keys on a Btree page", 0x0, 0, 20, 20, &g.c_key_gap,
-    NULL},
+  {"btree.key_gap", "btree page instantiated key gap", 0x0, 0, 20, 20, &g.c_key_gap, NULL},
 
-  {"btree.key_max", "maximum size of keys", 0x0, 20, 128, MEGABYTE(10), &g.c_key_max, NULL},
+  {"btree.key_max", "maximum key size", 0x0, 20, 128, MEGABYTE(10), &g.c_key_max, NULL},
 
   /*
    * A minimum key size of 11 is necessary. Row-store keys have a leading 10-digit number and the
    * 11 guarantees we never see a key that we can't convert to a numeric value without formatting
    * it first because there's a trailing non-digit character in every key.
    */
-  {"btree.key_min", "minimum size of keys", 0x0, 11, 32, 256, &g.c_key_min, NULL},
+  {"btree.key_min", "minimum key size", 0x0, 11, 32, 256, &g.c_key_min, NULL},
 
-  {"btree.leaf_page_max", "maximum size of Btree leaf nodes", 0x0, 9, 17, 27, &g.c_leaf_page_max,
-    NULL},
+  {"btree.leaf_page_max", "btree leaf node maximum size", 0x0, 9, 17, 27, &g.c_leaf_page_max, NULL},
 
-  {"btree.memory_page_max", "maximum size of in-memory pages", 0x0, 1, 10, 128,
-    &g.c_memory_page_max, NULL},
+  {"btree.memory_page_max", "maximum cache page size", 0x0, 1, 10, 128, &g.c_memory_page_max, NULL},
 
   /* 80% */
-  {"btree.prefix_compression", "if keys are prefix compressed", C_BOOL, 80, 0, 0,
+  {"btree.prefix_compression", "configure prefix compressed keys", C_BOOL, 80, 0, 0,
     &g.c_prefix_compression, NULL},
 
-  {"btree.prefix_compression_min", "minimum gain before prefix compression is used", 0x0, 0, 8, 256,
-    &g.c_prefix_compression_min, NULL},
+  {"btree.prefix_compression_min", "minimum gain before prefix compression is used (bytes)", 0x0, 0,
+    8, 256, &g.c_prefix_compression_min, NULL},
 
-  {"btree.repeat_data_pct", "percent duplicate values in row- or var-length column-stores", 0x0, 0,
-    90, 90, &g.c_repeat_data_pct, NULL},
+  {"btree.repeat_data_pct", "duplicate values (percentage)", 0x0, 0, 90, 90, &g.c_repeat_data_pct,
+    NULL},
 
   /* 10% */
-  {"btree.reverse", "collate in reverse order", C_BOOL, 10, 0, 0, &g.c_reverse, NULL},
+  {"btree.reverse", "reverse order collation", C_BOOL, 10, 0, 0, &g.c_reverse, NULL},
 
   {"btree.split_pct", "page split size as a percentage of the maximum page size", 0x0, 50, 100, 100,
     &g.c_split_pct, NULL},
 
-  {"btree.value_max", "maximum size of values", 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL},
+  {"btree.value_max", "maximum value size", 0x0, 32, 4096, MEGABYTE(10), &g.c_value_max, NULL},
 
-  {"btree.value_min", "minimum size of values", 0x0, 0, 20, 4096, &g.c_value_min, NULL},
+  {"btree.value_min", "minimum value size", 0x0, 0, 20, 4096, &g.c_value_min, NULL},
 
-  {"cache", "size of the cache in MB", 0x0, 1, 100, 100 * 1024, &g.c_cache, NULL},
+  {"cache", "cache size (MB)", 0x0, 1, 100, 100 * 1024, &g.c_cache, NULL},
 
-  {"cache.evict_max", "the maximum number of eviction workers", 0x0, 0, 5, 100, &g.c_evict_max,
-    NULL},
+  {"cache.evict_max", "maximum number of eviction workers", 0x0, 0, 5, 100, &g.c_evict_max, NULL},
 
-  {"cache.minimum", "minimum size of the cache in MB", C_IGNORE, 0, 0, 100 * 1024,
-    &g.c_cache_minimum, NULL},
+  {"cache.minimum", "minimum cache size (MB)", C_IGNORE, 0, 0, 100 * 1024, &g.c_cache_minimum,
+    NULL},
 
-  {"checkpoint", "type of checkpoints (on | off | wiredtiger)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"checkpoint", "checkpoint type (on | off | wiredtiger)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_checkpoint},
 
   {"checkpoint.log_size", "MB of log to wait if wiredtiger checkpoints configured", 0x0, 20, 200,
@@ -156,151 +152,145 @@ static CONFIG c[] = {
   {"checkpoint.wait", "seconds to wait if wiredtiger checkpoints configured", 0x0, 5, 100, 3600,
     &g.c_checkpoint_wait, NULL},
 
-  {"disk.checksum", "type of checksums (on | off | uncompressed)", C_IGNORE | C_STRING, 0, 0, 0,
-    NULL, &g.c_checksum},
+  {"disk.checksum", "checksum type (on | off | uncompressed)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+    &g.c_checksum},
 
   /* 5% */
-  {"disk.data_extend", "if data files are extended", C_BOOL, 5, 0, 0, &g.c_data_extend, NULL},
+  {"disk.data_extend", "configure data file extension", C_BOOL, 5, 0, 0, &g.c_data_extend, NULL},
 
   /* 0% */
-  {"disk.direct_io", "if direct I/O is configured for data objects", C_IGNORE | C_BOOL, 0, 0, 1,
+  {"disk.direct_io", "configure direct I/O for data objects", C_IGNORE | C_BOOL, 0, 0, 1,
     &g.c_direct_io, NULL},
 
-  {"disk.encryption", "type of encryption (none | rotn-7)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"disk.encryption", "encryption type (none | rotn-7)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_encryption},
 
   /* 10% */
-  {"disk.firstfit", "if allocation is firstfit", C_BOOL, 10, 0, 0, &g.c_firstfit, NULL},
+  {"disk.firstfit", "configure first-fit allocation", C_BOOL, 10, 0, 0, &g.c_firstfit, NULL},
 
   /* 90% */
-  {"disk.mmap", "configure for mmap operations (readonly)", C_BOOL, 90, 0, 0, &g.c_mmap, NULL},
+  {"disk.mmap", "configure mmap operations (reads only)", C_BOOL, 90, 0, 0, &g.c_mmap, NULL},
 
   /* 5% */
-  {"disk.mmap_all", "configure for mmap operations (read and write)", C_BOOL, 5, 0, 0,
-    &g.c_mmap_all, NULL},
+  {"disk.mmap_all", "configure mmap operations (read and write)", C_BOOL, 5, 0, 0, &g.c_mmap_all,
+    NULL},
 
   /* 0% */
-  {"format.abort", "if timed run should drop core", C_BOOL, 0, 0, 0, &g.c_abort, NULL},
+  {"format.abort", "drop core during timed run", C_BOOL, 0, 0, 0, &g.c_abort, NULL},
 
   /* 75% */
-  {"format.independent_thread_rng", "if thread RNG space is independent", C_BOOL, 75, 0, 0,
+  {"format.independent_thread_rng", "configure independent thread RNG space", C_BOOL, 75, 0, 0,
     &g.c_independent_thread_rng, NULL},
 
-  {"format.major_timeout", "timeout for long-running operations (minutes)", C_IGNORE, 0, 0, 1000,
+  {"format.major_timeout", "long-running operations timeout (minutes)", C_IGNORE, 0, 0, 1000,
     &g.c_major_timeout, NULL},
 
   /* 50% */
-  {"logging", "if logging configured", C_BOOL, 50, 0, 0, &g.c_logging, NULL},
+  {"logging", "configure logging", C_BOOL, 50, 0, 0, &g.c_logging, NULL},
 
   /* 50% */
-  {"logging.archive", "if log file archival configured", C_BOOL, 50, 0, 0, &g.c_logging_archive,
-    NULL},
+  {"logging.archive", "configure log file archival", C_BOOL, 50, 0, 0, &g.c_logging_archive, NULL},
 
-  {"logging.compression", "type of logging compression " COMPRESSION_LIST, C_IGNORE | C_STRING, 0,
-    0, 0, NULL, &g.c_logging_compression},
+  {"logging.compression", "logging compression type" COMPRESSION_LIST, C_IGNORE | C_STRING, 0, 0, 0,
+    NULL, &g.c_logging_compression},
 
-  {"logging.file_max", "maximum log file size in KB", 0x0, 100, 512000, 2097152,
+  {"logging.file_max", "maximum log file size (KB)", 0x0, 100, 512000, 2097152,
     &g.c_logging_file_max, NULL},
 
   /* 50% */
-  {"logging.prealloc", "if log file pre-allocation configured", C_BOOL, 50, 0, 0,
-    &g.c_logging_prealloc, NULL},
+  {"logging.prealloc", "configure log file pre-allocation", C_BOOL, 50, 0, 0, &g.c_logging_prealloc,
+    NULL},
 
   /* 90% */
-  {"lsm.auto_throttle", "if LSM inserts are throttled", C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL},
+  {"lsm.auto_throttle", "throttle LSM inserts", C_BOOL, 90, 0, 0, &g.c_auto_throttle, NULL},
 
   /* 95% */
-  {"lsm.bloom", "if bloom filters are configured", C_BOOL, 95, 0, 0, &g.c_bloom, NULL},
+  {"lsm.bloom", "configure bloom filters", C_BOOL, 95, 0, 0, &g.c_bloom, NULL},
 
-  {"lsm.bloom_bit_count", "number of bits per item for LSM bloom filters", 0x0, 4, 64, 1000,
+  {"lsm.bloom_bit_count", "number of bits per item for bloom filters", 0x0, 4, 64, 1000,
     &g.c_bloom_bit_count, NULL},
 
-  {"lsm.bloom_hash_count", "number of hash values per item for LSM bloom filters", 0x0, 4, 32, 100,
+  {"lsm.bloom_hash_count", "number of hash values per item for bloom filters", 0x0, 4, 32, 100,
     &g.c_bloom_hash_count, NULL},
 
   /* 10% */
-  {"lsm.bloom_oldest", "if bloom_oldest=true", C_BOOL, 10, 0, 0, &g.c_bloom_oldest, NULL},
+  {"lsm.bloom_oldest", "configure bloom_oldest=true", C_BOOL, 10, 0, 0, &g.c_bloom_oldest, NULL},
 
-  {"lsm.chunk_size", "LSM chunk size in MB", 0x0, 1, 10, 100, &g.c_chunk_size, NULL},
+  {"lsm.chunk_size", "LSM chunk size (MB)", 0x0, 1, 10, 100, &g.c_chunk_size, NULL},
 
-  {"lsm.merge_max", "the maximum number of chunks to include in a merge operation", 0x0, 4, 20, 100,
-    &g.c_merge_max, NULL},
+  {"lsm.merge_max", "maximum number of chunks to include in an LSM merge operation", 0x0, 4, 20,
+    100, &g.c_merge_max, NULL},
 
-  {"lsm.worker_threads", "the number of LSM worker threads", 0x0, 3, 4, 20, &g.c_lsm_worker_threads,
+  {"lsm.worker_threads", "number of LSM worker threads", 0x0, 3, 4, 20, &g.c_lsm_worker_threads,
     NULL},
 
   /* 10% */
-  {"ops.alter", "if altering the table is enabled", C_BOOL, 10, 0, 0, &g.c_alter, NULL},
+  {"ops.alter", "configure table alterations", C_BOOL, 10, 0, 0, &g.c_alter, NULL},
 
   /* 10% */
-  {"ops.compaction", "if compaction is running", C_BOOL, 10, 0, 0, &g.c_compact, NULL},
+  {"ops.compaction", "configure compaction", C_BOOL, 10, 0, 0, &g.c_compact, NULL},
 
   /* 50% */
-  {"ops.hs_cursor", "if history store cursor reads configured", C_BOOL, 50, 0, 0, &g.c_hs_cursor,
-    NULL},
+  {"ops.hs_cursor", "configure history store cursor reads", C_BOOL, 50, 0, 0, &g.c_hs_cursor, NULL},
 
-  {"ops.pct.delete", "percent operations that are deletes", C_IGNORE, 0, 0, 100, &g.c_delete_pct,
-    NULL},
+  {"ops.pct.delete", "delete operations (percentage)", C_IGNORE, 0, 0, 100, &g.c_delete_pct, NULL},
 
-  {"ops.pct.insert", "percent operations that are inserts", C_IGNORE, 0, 0, 100, &g.c_insert_pct,
-    NULL},
+  {"ops.pct.insert", "insert operations (percentage)", C_IGNORE, 0, 0, 100, &g.c_insert_pct, NULL},
 
-  {"ops.pct.modify", "percent operations that are value modifications", C_IGNORE, 0, 0, 100,
-    &g.c_modify_pct, NULL},
+  {"ops.pct.modify", "modify operations (percentage)", C_IGNORE, 0, 0, 100, &g.c_modify_pct, NULL},
 
-  {"ops.pct.read", "percent operations that are reads", C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL},
+  {"ops.pct.read", "read operations (percentage)", C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL},
 
-  {"ops.pct.write", "percent operations that are value updates", C_IGNORE, 0, 0, 100,
-    &g.c_write_pct, NULL},
+  {"ops.pct.write", "update operations (percentage)", C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL},
 
   /* 5% */
   {"ops.prepare", "configure transaction prepare", C_BOOL, 5, 0, 0, &g.c_prepare, NULL},
 
   /* 10% */
-  {"ops.random_cursor", "if random cursor reads configured", C_BOOL, 10, 0, 0, &g.c_random_cursor,
+  {"ops.random_cursor", "configure random cursor reads", C_BOOL, 10, 0, 0, &g.c_random_cursor,
     NULL},
 
   /* 100% */
-  {"ops.rebalance", "rebalance testing", C_BOOL, 100, 1, 0, &g.c_rebalance, NULL},
+  {"ops.rebalance", "configure rebalance", C_BOOL, 100, 1, 0, &g.c_rebalance, NULL},
 
   /* 100% */
-  {"ops.salvage", "salvage testing", C_BOOL, 100, 1, 0, &g.c_salvage, NULL},
+  {"ops.salvage", "configure salvage", C_BOOL, 100, 1, 0, &g.c_salvage, NULL},
 
   /* 100% */
-  {"ops.truncate", "enable truncation", C_BOOL, 100, 0, 0, &g.c_truncate, NULL},
+  {"ops.truncate", "configure truncation", C_BOOL, 100, 0, 0, &g.c_truncate, NULL},
 
   /* 100% */
-  {"ops.verify", "to regularly verify during a run", C_BOOL, 100, 1, 0, &g.c_verify, NULL},
+  {"ops.verify", "configure verify", C_BOOL, 100, 1, 0, &g.c_verify, NULL},
 
   {"quiet", "quiet run (same as -q)", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_quiet, NULL},
 
-  {"runs", "the number of runs", C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL},
+  {"runs", "number of runs", C_IGNORE, 0, 0, UINT_MAX, &g.c_runs, NULL},
 
-  {"runs.in_memory", "if in-memory configured", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_in_memory, NULL},
+  {"runs.in_memory", "configure in-memory", C_IGNORE | C_BOOL, 0, 0, 1, &g.c_in_memory, NULL},
 
-  {"runs.ops", "the number of operations done per run", 0x0, 0, M(2), M(100), &g.c_ops, NULL},
+  {"runs.ops", "operations per run", 0x0, 0, M(2), M(100), &g.c_ops, NULL},
 
-  {"runs.rows", "the number of rows to create", 0x0, 10, M(1), M(100), &g.c_rows, NULL},
+  {"runs.rows", "number of rows", 0x0, 10, M(1), M(100), &g.c_rows, NULL},
 
-  {"runs.source", "data source (file | lsm | table)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"runs.source", "data source type (file | lsm | table)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_data_source},
 
-  {"runs.threads", "the number of worker threads", 0x0, 1, 32, 128, &g.c_threads, NULL},
+  {"runs.threads", "number of worker threads", 0x0, 1, 32, 128, &g.c_threads, NULL},
 
-  {"runs.timer", "maximum time to run in minutes", C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL},
+  {"runs.timer", "run time (minutes)", C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL},
 
-  {"runs.type", "type of store to create (fix | var | row)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
+  {"runs.type", "object type (fix | var | row)", C_IGNORE | C_STRING, 0, 0, 0, NULL,
     &g.c_file_type},
 
-  {"runs.verify_failure_dump", "attempt page dump on repeatable read error", C_IGNORE | C_BOOL, 0,
+  {"runs.verify_failure_dump", "configure page dump on repeatable read error", C_IGNORE | C_BOOL, 0,
     0, 1, &g.c_verify_failure_dump, NULL},
 
   /* 20% */
-  {"statistics", "maintain statistics", C_BOOL, 20, 0, 0, &g.c_statistics, NULL},
+  {"statistics", "configure statistics", C_BOOL, 20, 0, 0, &g.c_statistics, NULL},
 
   /* 5% */
-  {"statistics.server", "run the statistics server thread", C_BOOL, 5, 0, 0, &g.c_statistics_server,
-    NULL},
+  {"statistics.server", "configure statistics server thread", C_BOOL, 5, 0, 0,
+    &g.c_statistics_server, NULL},
 
   /* 2% */
   {"stress.aggressive_sweep", "stress aggressive sweep", C_BOOL, 2, 0, 0,
@@ -341,25 +331,29 @@ static CONFIG c[] = {
   /* 2% */
   {"stress.split_8", "stress splits (#8)", C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL},
 
-  {"transaction.frequency", "percent operations done inside an explicit transaction", 0x0, 1, 100,
+  {"transaction.frequency", "operations inside an explicit transaction (percentage)", 0x0, 1, 100,
     100, &g.c_txn_freq, NULL},
 
   {"transaction.isolation",
     "isolation level (random | read-uncommitted | read-committed | snapshot)", C_IGNORE | C_STRING,
     0, 0, 0, NULL, &g.c_isolation},
 
+  /* 0% - By default, turned off until fallout has been debugged. */
+  {"transaction.rollback_to_stable", "configure rollback_to_stable", C_BOOL, 0, 0, 0,
+    &g.c_txn_rollback_to_stable, NULL},
+
   /* 70% */
-  {"transaction.timestamps", "enable transaction timestamp support", C_BOOL, 70, 0, 0,
+  {"transaction.timestamps", "configure transaction timestamps", C_BOOL, 70, 0, 0,
     &g.c_txn_timestamps, NULL},
 
-  {"wiredtiger.config", "configuration string used to wiredtiger_open", C_IGNORE | C_STRING, 0, 0,
-    0, NULL, &g.c_config_open},
+  {"wiredtiger.config", "wiredtiger_open API configuration string", C_IGNORE | C_STRING, 0, 0, 0,
+    NULL, &g.c_config_open},
 
   /* 80% */
-  {"wiredtiger.rwlock", "if wiredtiger read/write mutexes should be used", C_BOOL, 80, 0, 0,
-    &g.c_wt_mutex, NULL},
+  {"wiredtiger.rwlock", "configure wiredtiger read/write mutexes", C_BOOL, 80, 0, 0, &g.c_wt_mutex,
+    NULL},
 
-  {"wiredtiger.leak_memory", "if memory should be leaked on close", C_BOOL, 0, 0, 0,
+  {"wiredtiger.leak_memory", "configure memory leaked on shutdown", C_BOOL, 0, 0, 0,
     &g.c_leak_memory, NULL},
 
   {NULL, NULL, 0x0, 0, 0, 0, NULL, NULL}};
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index af5e16cc60b..5b4e8589b8c 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -74,7 +74,6 @@ typedef struct {
 #define LOCK_INITIALIZED(lock) ((lock)->lock_type != LOCK_NONE)
 
 typedef struct {
-    wt_thread_t tid;  /* thread ID */
     char tidbuf[128]; /* thread ID in printable form */
 
     WT_CONNECTION *wts_conn;
@@ -112,6 +111,8 @@ typedef struct {
 
     WT_RAND_STATE rnd; /* Global RNG state */
 
+    uint32_t rts_no_check; /* track unsuccessful RTS checking */
+
     /*
      * Prepare will return an error if the prepare timestamp is less than any active read timestamp.
      * Lock across allocating prepare and read timestamps.
@@ -121,7 +122,9 @@ typedef struct {
      */
     RWLOCK ts_lock;
 
-    uint64_t timestamp; /* Counter for timestamps */
+    uint64_t timestamp;        /* Counter for timestamps */
+    uint64_t oldest_timestamp; /* Last timestamp used for oldest */
+    uint64_t stable_timestamp; /* Last timestamp used for stable */
 
     uint64_t truncate_cnt; /* Counter for truncation */
 
@@ -222,6 +225,7 @@ typedef struct {
     uint32_t c_timing_stress_split_8;
     uint32_t c_truncate;
     uint32_t c_txn_freq;
+    uint32_t c_txn_rollback_to_stable;
     uint32_t c_txn_timestamps;
     uint32_t c_value_max;
     uint32_t c_value_min;
@@ -305,6 +309,13 @@ typedef struct {
 } SNAP_OPS;
 
 typedef struct {
+    SNAP_OPS *snap_state_current;
+    SNAP_OPS *snap_state_end;
+    SNAP_OPS *snap_state_first;
+    SNAP_OPS *snap_state_list;
+} SNAP_STATE;
+
+typedef struct {
     int id;           /* simple thread ID */
     wt_thread_t tid;  /* thread ID */
     char tidbuf[128]; /* thread ID in printable form */
@@ -340,7 +351,14 @@ typedef struct {
     uint64_t opid;         /* Operation ID */
     uint64_t read_ts;      /* read timestamp */
     uint64_t commit_ts;    /* commit timestamp */
-    SNAP_OPS *snap, *snap_first, snap_list[512];
+    uint64_t stable_ts;    /* stable timestamp */
+    SNAP_STATE snap_states[2];
+    SNAP_STATE *s; /* points to one of the snap_states */
+
+#define snap_current s->snap_state_current
+#define snap_end s->snap_state_end
+#define snap_first s->snap_state_first
+#define snap_list s->snap_state_list
 
     uint64_t insert_list[256]; /* column-store inserted records */
     u_int insert_list_cnt;
@@ -354,6 +372,8 @@ typedef struct {
 } TINFO;
 extern TINFO **tinfo_list;
 
+#define SNAP_LIST_SIZE 512
+
 WT_THREAD_RET alter(void *);
 WT_THREAD_RET backup(void *);
 WT_THREAD_RET checkpoint(void *);
@@ -382,12 +402,16 @@ void operations(u_int, bool);
 void path_setup(const char *);
 void set_alarm(u_int);
 void set_core_off(void);
-void snap_init(TINFO *, uint64_t, bool);
+void snap_init(TINFO *);
+void snap_teardown(TINFO *);
+void snap_op_init(TINFO *, uint64_t, bool);
+void snap_repeat_rollback(WT_CURSOR *, TINFO **, size_t);
 void snap_repeat_single(WT_CURSOR *, TINFO *);
 int snap_repeat_txn(WT_CURSOR *, TINFO *);
 void snap_repeat_update(TINFO *, bool);
 void snap_track(TINFO *, thread_op);
-void timestamp_once(WT_SESSION *);
+void timestamp_once(WT_SESSION *, bool);
+void timestamp_parse(WT_SESSION *, const char *, uint64_t *);
 int trace_config(const char *);
 void trace_init(void);
 void trace_ops_init(TINFO *);
diff --git a/src/third_party/wiredtiger/test/format/hs.c b/src/third_party/wiredtiger/test/format/hs.c
index 55b298f20e7..0cb0cf352aa 100644
--- a/src/third_party/wiredtiger/test/format/hs.c
+++ b/src/third_party/wiredtiger/test/format/hs.c
@@ -83,8 +83,13 @@ hs_cursor(void *arg)
         /* Search to the last-known location. */
         if (!restart) {
             cursor->set_key(cursor, hs_btree_id, &key, hs_start_ts, hs_counter);
+
+            /*
+             * Limit expected errors because this is a diagnostic check (the WiredTiger API allows
+             * prepare-conflict, but that would be unexpected from the history store file).
+             */
             ret = cursor->search_near(cursor, &exact);
-            testutil_assert(ret == 0 || ret == WT_NOTFOUND);
+            testutil_assert(ret == 0 || ret == WT_NOTFOUND || ret == WT_ROLLBACK);
         }
 
         /*
@@ -99,7 +104,7 @@ hs_cursor(void *arg)
                   cursor, &hs_stop_durable_ts, &hs_durable_timestamp, &hs_upd_type, &hs_value));
                 continue;
             }
-            testutil_assert(ret == WT_NOTFOUND);
+            testutil_assert(ret == WT_NOTFOUND || ret == WT_ROLLBACK);
             break;
         }
 
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 0d4692f0472..38c9a5b5690 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -102,6 +102,130 @@ random_failure(void)
 TINFO **tinfo_list;
 
 /*
+ * tinfo_init --
+ *     Initialize the worker thread structures.
+ */
+static void
+tinfo_init(void)
+{
+    TINFO *tinfo;
+    u_int i;
+
+    /* Allocate the thread structures separately to minimize false sharing. */
+    if (tinfo_list == NULL) {
+        tinfo_list = dcalloc((size_t)g.c_threads + 1, sizeof(TINFO *));
+        for (i = 0; i < g.c_threads; ++i) {
+            tinfo_list[i] = dcalloc(1, sizeof(TINFO));
+            tinfo = tinfo_list[i];
+
+            tinfo->id = (int)i + 1;
+
+            /* Set up the default key and value buffers. */
+            tinfo->key = &tinfo->_key;
+            key_gen_init(tinfo->key);
+            tinfo->value = &tinfo->_value;
+            val_gen_init(tinfo->value);
+            tinfo->lastkey = &tinfo->_lastkey;
+            key_gen_init(tinfo->lastkey);
+
+            snap_init(tinfo);
+        }
+    }
+
+    /* Cleanup for each new run. */
+    for (i = 0; i < g.c_threads; ++i) {
+        tinfo = tinfo_list[i];
+
+        tinfo->ops = 0;
+        tinfo->commit = 0;
+        tinfo->insert = 0;
+        tinfo->prepare = 0;
+        tinfo->remove = 0;
+        tinfo->rollback = 0;
+        tinfo->search = 0;
+        tinfo->truncate = 0;
+        tinfo->update = 0;
+
+        tinfo->session = NULL;
+        tinfo->cursor = NULL;
+
+        tinfo->insert_list_cnt = 0;
+
+        tinfo->state = TINFO_RUNNING;
+        tinfo->quit = false;
+    }
+}
+
+/*
+ * tinfo_teardown --
+ *     Tear down the worker thread structures.
+ */
+static void
+tinfo_teardown(void)
+{
+    TINFO *tinfo;
+    u_int i;
+
+    for (i = 0; i < g.c_threads; ++i) {
+        tinfo = tinfo_list[i];
+
+        __wt_buf_free(NULL, &tinfo->vprint);
+
+        /*
+         * Assert records were not removed unless configured to do so, otherwise subsequent runs can
+         * incorrectly report scan errors.
+         */
+        testutil_assert(g.c_delete_pct != 0 || tinfo->remove == 0);
+
+        snap_teardown(tinfo);
+        key_gen_teardown(tinfo->key);
+        val_gen_teardown(tinfo->value);
+        key_gen_teardown(tinfo->lastkey);
+
+        free(tinfo);
+    }
+    free(tinfo_list);
+    tinfo_list = NULL;
+}
+
+/*
+ * Command used before rollback to stable to save the interesting files so we can replay the command
+ * as necessary.
+ *
+ * Redirect the "cd" command to /dev/null so chatty cd implementations don't add the new working
+ * directory to our output.
+ */
+#define ROLLBACK_STABLE_COPY_CMD                      \
+    "cd %s > /dev/null && "                           \
+    "rm -rf ROLLBACK.copy && mkdir ROLLBACK.copy && " \
+    "cp WiredTiger* wt* ROLLBACK.copy/"
+
+/*
+ * tinfo_rollback_to_stable_and_check --
+ *     Do a rollback to stable, then check that changes are correct from what we know in the worker
+ *     thread structures.
+ */
+static void
+tinfo_rollback_to_stable_and_check(WT_SESSION *session)
+{
+    WT_CURSOR *cursor;
+    WT_DECL_RET;
+    char cmd[512];
+
+    testutil_check(__wt_snprintf(cmd, sizeof(cmd), ROLLBACK_STABLE_COPY_CMD, g.home));
+    if ((ret = system(cmd)) != 0)
+        testutil_die(ret, "rollback to stable copy (\"%s\") failed", cmd);
+    trace_msg("%-10s ts=%" PRIu64, "rts", g.stable_timestamp);
+
+    g.wts_conn->rollback_to_stable(g.wts_conn, NULL);
+
+    /* Check the saved snap operations for consistency. */
+    testutil_check(session->open_cursor(session, g.uri, NULL, NULL, &cursor));
+    snap_repeat_rollback(cursor, tinfo_list, g.c_threads);
+    testutil_check(cursor->close(cursor));
+}
+
+/*
  * operations --
  *     Perform a number of operations in a set of threads.
  */
@@ -153,29 +277,23 @@ operations(u_int ops_seconds, bool lastrun)
         quit_fourths = fourths + 15 * 4 * 60;
     }
 
+    /* Get a session. */
     testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+    /* Initialize and start the worker threads. */
+    tinfo_init();
     trace_msg("%s", "=============== thread ops start");
 
     /* Initialize locks to single-thread backups, failures, and timestamp updates. */
     lock_init(session, &g.backup_lock);
     lock_init(session, &g.ts_lock);
 
-    /*
-     * Create the per-thread structures and start the worker threads. Allocate the thread structures
-     * separately to minimize false sharing.
-     */
-    tinfo_list = dcalloc((size_t)g.c_threads + 1, sizeof(TINFO *));
     for (i = 0; i < g.c_threads; ++i) {
-        tinfo_list[i] = tinfo = dcalloc(1, sizeof(TINFO));
-
-        tinfo->id = (int)i + 1;
-        tinfo->state = TINFO_RUNNING;
+        tinfo = tinfo_list[i];
         testutil_check(__wt_thread_create(NULL, &tinfo->tid, ops, tinfo));
     }
 
-    /*
-     * If a multi-threaded run, start optional special-purpose threads.
-     */
+    /* Start optional special-purpose threads. */
     if (g.c_alter)
         testutil_check(__wt_thread_create(NULL, &alter_tid, alter, NULL));
     if (g.c_backups)
@@ -278,21 +396,14 @@ operations(u_int ops_seconds, bool lastrun)
     lock_destroy(session, &g.ts_lock);
 
     trace_msg("%s", "=============== thread ops stop");
-    testutil_check(session->close(session, NULL));
 
-    for (i = 0; i < g.c_threads; ++i) {
-        tinfo = tinfo_list[i];
+    if (g.c_txn_rollback_to_stable)
+        tinfo_rollback_to_stable_and_check(session);
 
-        __wt_buf_free(NULL, &tinfo->vprint);
+    testutil_check(session->close(session, NULL));
 
-        /*
-         * Assert records were not removed unless configured to do so, otherwise subsequent runs can
-         * incorrectly report scan errors.
-         */
-        testutil_assert(g.c_delete_pct != 0 || tinfo->remove == 0);
-        free(tinfo);
-    }
-    free(tinfo_list);
+    if (lastrun)
+        tinfo_teardown();
 }
 
 /*
@@ -335,7 +446,7 @@ begin_transaction_ts(TINFO *tinfo, u_int *iso_configp)
         testutil_check(__wt_snprintf(buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
         ret = session->timestamp_transaction(session, buf);
         if (ret == 0) {
-            snap_init(tinfo, ts, true);
+            snap_op_init(tinfo, ts, true);
             trace_op(tinfo, "begin snapshot read-ts=%" PRIu64 " (repeatable)", ts);
             return;
         }
@@ -363,7 +474,7 @@ begin_transaction_ts(TINFO *tinfo, u_int *iso_configp)
 
     lock_writeunlock(session, &g.ts_lock);
 
-    snap_init(tinfo, ts, false);
+    snap_op_init(tinfo, ts, false);
     trace_op(tinfo, "begin snapshot read-ts=%" PRIu64 " (not repeatable)", ts);
 }
 
@@ -401,7 +512,7 @@ begin_transaction(TINFO *tinfo, u_int *iso_configp)
 
     wiredtiger_begin_transaction(session, config);
 
-    snap_init(tinfo, WT_TS_NONE, false);
+    snap_op_init(tinfo, WT_TS_NONE, false);
     trace_op(tinfo, "begin %s", config);
 }
 
@@ -605,10 +716,10 @@ ops(void *arg)
     testutil_check(__wt_thread_str(tinfo->tidbuf, sizeof(tinfo->tidbuf)));
 
     /*
-     * Characterize the per-thread random number generator. Normally we want independent behavior
-     * so threads start in different parts of the RNG space, but we've found bugs by having the
-     * threads pound on the same key/value pairs, that is, by making them traverse the same RNG
-     *  space. 75% of the time we run in independent RNG space.
+     * Characterize the per-thread random number generator. Normally we want independent behavior so
+     * threads start in different parts of the RNG space, but we've found bugs by having the threads
+     * pound on the same key/value pairs, that is, by making them traverse the same RNG space. 75%
+     * of the time we run in independent RNG space.
      */
     if (g.c_independent_thread_rng)
         __wt_random_init_seed(NULL, &tinfo->rnd);
@@ -617,17 +728,6 @@ ops(void *arg)
 
     iso_config = ISOLATION_RANDOM; /* -Wconditional-uninitialized */
 
-    /* Tracking of transactional snapshot isolation operations. */
-    tinfo->snap = tinfo->snap_first = tinfo->snap_list;
-
-    /* Set up the default key and value buffers. */
-    tinfo->key = &tinfo->_key;
-    key_gen_init(tinfo->key);
-    tinfo->value = &tinfo->_value;
-    val_gen_init(tinfo->value);
-    tinfo->lastkey = &tinfo->_lastkey;
-    key_gen_init(tinfo->lastkey);
-
     /* Set the first operation where we'll create sessions and cursors. */
     cursor = NULL;
     session = NULL;
@@ -996,16 +1096,11 @@ rollback:
         intxn = false;
     }
 
-    if (session != NULL)
+    if (session != NULL) {
         testutil_check(session->close(session, NULL));
-
-    for (i = 0; i < WT_ELEMENTS(tinfo->snap_list); ++i) {
-        free(tinfo->snap_list[i].kdata);
-        free(tinfo->snap_list[i].vdata);
+        tinfo->cursor = NULL;
+        tinfo->session = NULL;
     }
-    key_gen_teardown(tinfo->key);
-    val_gen_teardown(tinfo->value);
-    key_gen_teardown(tinfo->lastkey);
 
     tinfo->state = TINFO_COMPLETE;
     return (WT_THREAD_RET_VALUE);
diff --git a/src/third_party/wiredtiger/test/format/snap.c b/src/third_party/wiredtiger/test/format/snap.c
index 3f82e5b6499..4fe784e87db 100644
--- a/src/third_party/wiredtiger/test/format/snap.c
+++ b/src/third_party/wiredtiger/test/format/snap.c
@@ -29,15 +29,111 @@
 #include "format.h"
 
 /*
+ * Issue a warning when there enough consecutive unsuccessful checks for rollback to stable.
+ */
+#define WARN_RTS_NO_CHECK 5
+
+/*
  * snap_init --
  *     Initialize the repeatable operation tracking.
  */
 void
-snap_init(TINFO *tinfo, uint64_t read_ts, bool repeatable_reads)
+snap_init(TINFO *tinfo)
+{
+    /*
+     * We maintain two snap lists. The current one is indicated by tinfo->s, and keeps the most
+     * recent operations. The other one is used when we are running with rollback_to_stable. When
+     * each thread notices that the stable timestamp has changed, it stashes the current snap list
+     * and starts fresh with the other snap list. After we've completed a rollback_to_stable, we can
+     * the secondary snap list to see the state of keys/values seen and updated at the time of the
+     * rollback.
+     */
+    if (g.c_txn_rollback_to_stable) {
+        tinfo->s = &tinfo->snap_states[1];
+        tinfo->snap_list = dcalloc(SNAP_LIST_SIZE, sizeof(SNAP_OPS));
+        tinfo->snap_end = &tinfo->snap_list[SNAP_LIST_SIZE];
+    }
+    tinfo->s = &tinfo->snap_states[0];
+    tinfo->snap_list = dcalloc(SNAP_LIST_SIZE, sizeof(SNAP_OPS));
+    tinfo->snap_end = &tinfo->snap_list[SNAP_LIST_SIZE];
+    tinfo->snap_current = tinfo->snap_list;
+}
+
+/*
+ * snap_teardown --
+ *     Tear down the repeatable operation tracking structures.
+ */
+void
+snap_teardown(TINFO *tinfo)
 {
+    SNAP_OPS *snaplist;
+    u_int i, snap_index;
+
+    for (snap_index = 0; snap_index < WT_ELEMENTS(tinfo->snap_states); snap_index++)
+        if ((snaplist = tinfo->snap_states[snap_index].snap_state_list) != NULL) {
+            for (i = 0; i < SNAP_LIST_SIZE; ++i) {
+                free(snaplist[i].kdata);
+                free(snaplist[i].vdata);
+            }
+            free(snaplist);
+        }
+}
+
+/*
+ * snap_clear --
+ *     Clear a single snap entry.
+ */
+static void
+snap_clear_one(SNAP_OPS *snap)
+{
+    snap->repeatable = false;
+}
+
+/*
+ * snap_clear --
+ *     Clear the snap list.
+ */
+static void
+snap_clear(TINFO *tinfo)
+{
+    SNAP_OPS *snap;
+
+    for (snap = tinfo->snap_list; snap < tinfo->snap_end; ++snap)
+        snap_clear_one(snap);
+}
+
+/*
+ * snap_op_init --
+ *     Initialize the repeatable operation tracking for each new operation.
+ */
+void
+snap_op_init(TINFO *tinfo, uint64_t read_ts, bool repeatable_reads)
+{
+    uint64_t stable_ts;
+
     ++tinfo->opid;
 
-    tinfo->snap_first = tinfo->snap;
+    if (g.c_txn_rollback_to_stable) {
+        /*
+         * If the stable timestamp has changed and we've advanced beyond it, preserve the current
+         * snapshot history up to this point, we'll use it verify rollback_to_stable. Switch our
+         * tracking to the other snap list.
+         */
+        stable_ts = __wt_atomic_addv64(&g.stable_timestamp, 0);
+        if (stable_ts != tinfo->stable_ts && read_ts > stable_ts) {
+            tinfo->stable_ts = stable_ts;
+            if (tinfo->s == &tinfo->snap_states[0])
+                tinfo->s = &tinfo->snap_states[1];
+            else
+                tinfo->s = &tinfo->snap_states[0];
+            tinfo->snap_current = tinfo->snap_list;
+
+            /* Clear out older info from the snap list. */
+            snap_clear(tinfo);
+        }
+    }
+
+    tinfo->snap_first = tinfo->snap_current;
 
     tinfo->read_ts = read_ts;
     tinfo->repeatable_reads = repeatable_reads;
@@ -54,7 +150,7 @@ snap_track(TINFO *tinfo, thread_op op)
     WT_ITEM *ip;
     SNAP_OPS *snap;
 
-    snap = tinfo->snap;
+    snap = tinfo->snap_current;
     snap->op = op;
     snap->opid = tinfo->opid;
     snap->keyno = tinfo->keyno;
@@ -82,15 +178,15 @@ snap_track(TINFO *tinfo, thread_op op)
     }
 
     /* Move to the next slot, wrap at the end of the circular buffer. */
-    if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
-        tinfo->snap = tinfo->snap_list;
+    if (++tinfo->snap_current >= tinfo->snap_end)
+        tinfo->snap_current = tinfo->snap_list;
 
     /*
      * It's possible to pass this transaction's buffer starting point and start replacing our own
      * entries. If that happens, we can't repeat operations because we don't know which ones were
      * previously modified.
      */
-    if (tinfo->snap->opid == tinfo->opid)
+    if (tinfo->snap_current->opid == tinfo->opid)
         tinfo->repeatable_wrap = true;
 }
 
@@ -236,10 +332,9 @@ static void
 snap_ts_clear(TINFO *tinfo, uint64_t ts)
 {
     SNAP_OPS *snap;
-    int count;
 
     /* Check from the first slot to the last. */
-    for (snap = tinfo->snap_list, count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap)
+    for (snap = tinfo->snap_list; snap < tinfo->snap_end; ++snap)
         if (snap->repeatable && snap->ts <= ts)
             snap->repeatable = false;
 }
@@ -297,7 +392,7 @@ snap_repeat_ok_commit(TINFO *tinfo, SNAP_OPS *current)
      */
     for (p = current;;) {
         /* Wrap at the end of the circular buffer. */
-        if (++p >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+        if (++p >= tinfo->snap_end)
             p = tinfo->snap_list;
         if (p->opid != tinfo->opid)
             break;
@@ -311,7 +406,7 @@ snap_repeat_ok_commit(TINFO *tinfo, SNAP_OPS *current)
     for (p = current;;) {
         /* Wrap at the beginning of the circular buffer. */
         if (--p < tinfo->snap_list)
-            p = &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list) - 1];
+            p = &tinfo->snap_list[SNAP_LIST_SIZE - 1];
         if (p->opid != tinfo->opid)
             break;
 
@@ -341,7 +436,7 @@ snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current)
     for (p = current;;) {
         /* Wrap at the beginning of the circular buffer. */
         if (--p < tinfo->snap_list)
-            p = &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list) - 1];
+            p = &tinfo->snap_list[SNAP_LIST_SIZE - 1];
         if (p->opid != tinfo->opid)
             break;
 
@@ -367,7 +462,7 @@ snap_repeat_txn(WT_CURSOR *cursor, TINFO *tinfo)
     /* Check from the first operation we saved to the last. */
     for (current = tinfo->snap_first;; ++current) {
         /* Wrap at the end of the circular buffer. */
-        if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+        if (current >= tinfo->snap_end)
             current = tinfo->snap_list;
         if (current->opid != tinfo->opid)
             break;
@@ -401,7 +496,7 @@ snap_repeat_update(TINFO *tinfo, bool committed)
     /* Check from the first operation we saved to the last. */
     for (current = tinfo->snap_first;; ++current) {
         /* Wrap at the end of the circular buffer. */
-        if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+        if (current >= tinfo->snap_end)
             current = tinfo->snap_list;
         if (current->opid != tinfo->opid)
             break;
@@ -429,45 +524,22 @@ snap_repeat_update(TINFO *tinfo, bool committed)
 }
 
 /*
- * snap_repeat_single --
- *     Repeat an historic operation.
+ * snap_repeat --
+ *     Repeat one operation.
  */
-void
-snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo)
+static void
+snap_repeat(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap, bool rollback_allowed)
 {
-    SNAP_OPS *snap;
     WT_DECL_RET;
     WT_SESSION *session;
-    int count;
-    u_int v;
     char buf[64];
 
     session = cursor->session;
 
     /*
-     * Start at a random spot in the list of operations and look for a read to retry. Stop when
-     * we've walked the entire list or found one.
-     */
-    v = mmrand(&tinfo->rnd, 1, WT_ELEMENTS(tinfo->snap_list)) - 1;
-    for (snap = &tinfo->snap_list[v], count = WT_ELEMENTS(tinfo->snap_list); count > 0;
-         --count, ++snap) {
-        /* Wrap at the end of the circular buffer. */
-        if (snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
-            snap = tinfo->snap_list;
-
-        if (snap->repeatable)
-            break;
-    }
-
-    if (count == 0)
-        return;
-
-    /*
      * Start a new transaction. Set the read timestamp. Verify the record. Discard the transaction.
      */
-    while ((ret = session->begin_transaction(session, "isolation=snapshot")) == WT_CACHE_FULL)
-        __wt_yield();
-    testutil_check(ret);
+    wiredtiger_begin_transaction(session, "isolation=snapshot");
 
     /*
      * If the timestamp has aged out of the system, we'll get EINVAL when we try and set it.
@@ -482,7 +554,7 @@ snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo)
         /* The only expected error is rollback. */
         ret = snap_verify(cursor, tinfo, snap);
 
-        if (ret != 0 && ret != WT_ROLLBACK)
+        if (ret != 0 && (!rollback_allowed || ret != WT_ROLLBACK))
             testutil_check(ret);
     } else if (ret == EINVAL)
         snap_ts_clear(tinfo, snap->ts);
@@ -492,3 +564,91 @@ snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo)
     /* Discard the transaction. */
     testutil_check(session->rollback_transaction(session, NULL));
 }
+
+/*
+ * snap_repeat_single --
+ *     Repeat an historic operation.
+ */
+void
+snap_repeat_single(WT_CURSOR *cursor, TINFO *tinfo)
+{
+    SNAP_OPS *snap;
+    u_int v;
+    int count;
+
+    /*
+     * Start at a random spot in the list of operations and look for a read to retry. Stop when
+     * we've walked the entire list or found one.
+     */
+    v = mmrand(&tinfo->rnd, 1, SNAP_LIST_SIZE) - 1;
+    for (snap = &tinfo->snap_list[v], count = SNAP_LIST_SIZE; count > 0; --count, ++snap) {
+        /* Wrap at the end of the circular buffer. */
+        if (snap >= tinfo->snap_end)
+            snap = tinfo->snap_list;
+
+        if (snap->repeatable)
+            break;
+    }
+
+    if (count == 0)
+        return;
+
+    snap_repeat(cursor, tinfo, snap, true);
+}
+
+/*
+ * snap_repeat_rollback --
+ *     Repeat all known operations after a rollback.
+ */
+void
+snap_repeat_rollback(WT_CURSOR *cursor, TINFO **tinfo_array, size_t tinfo_count)
+{
+    SNAP_OPS *snap;
+    SNAP_STATE *state;
+    TINFO *tinfo, **tinfop;
+    uint32_t count;
+    size_t i, statenum;
+    char buf[100];
+
+    count = 0;
+
+    track("rollback_to_stable: checking", 0ULL, NULL);
+    for (i = 0, tinfop = tinfo_array; i < tinfo_count; ++i, ++tinfop) {
+        tinfo = *tinfop;
+
+        /*
+         * For this thread, walk through both sets of snaps ("states"), looking for entries that are
+         * repeatable and have relevant timestamps. One set will have the most current operations,
+         * meaning they will likely be newer than the stable timestamp, and thus cannot be checked.
+         * The other set typically has operations that are just before the stable timestamp, so are
+         * candidates for checking.
+         */
+        for (statenum = 0; statenum < WT_ELEMENTS(tinfo->snap_states); statenum++) {
+            state = &tinfo->snap_states[statenum];
+            for (snap = state->snap_state_list; snap < state->snap_state_end; ++snap) {
+                if (snap->repeatable && snap->ts <= g.stable_timestamp &&
+                  snap->ts >= g.oldest_timestamp) {
+                    snap_repeat(cursor, tinfo, snap, false);
+                    ++count;
+                    if (count % 100 == 0) {
+                        testutil_check(__wt_snprintf(
+                          buf, sizeof(buf), "rollback_to_stable: %" PRIu32 " ops repeated", count));
+                        track(buf, 0ULL, NULL);
+                    }
+                }
+                snap_clear_one(snap);
+            }
+        }
+    }
+
+    /* Show the final result and check that we're accomplishing some checking. */
+    testutil_check(
+      __wt_snprintf(buf, sizeof(buf), "rollback_to_stable: %" PRIu32 " ops repeated", count));
+    track(buf, 0ULL, NULL);
+    if (count == 0) {
+        if (++g.rts_no_check >= WARN_RTS_NO_CHECK)
+            fprintf(stderr,
+              "Warning: %" PRIu32 " consecutive runs with no rollback_to_stable checking\n", count);
+    } else
+        g.rts_no_check = 0;
+}
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 787488c15b7..d2ae281e281 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -28,12 +28,46 @@
 
 #include "format.h"
 
+/*
+ * track_ts_diff --
+ *     Return a one character descriptor of relative timestamp values.
+ */
+static const char *
+track_ts_diff(uint64_t left_ts, uint64_t right_ts)
+{
+    if (left_ts < right_ts)
+        return "+";
+    else if (left_ts == right_ts)
+        return "=";
+    else
+        return "-";
+}
+
+/*
+ * track_ts_dots --
+ *     Return an entry in the time stamp progress indicator.
+ */
+static const char *
+track_ts_dots(u_int dot_count)
+{
+    static const char *dots[] = {"   ", ".  ", ".. ", "..."};
+
+    return (dots[dot_count % WT_ELEMENTS(dots)]);
+}
+
+/*
+ * track --
+ *     Show a status line of operations and time stamp progress.
+ */
 void
 track(const char *tag, uint64_t cnt, TINFO *tinfo)
 {
-    static size_t lastlen = 0;
+    static size_t last_len;
+    static uint64_t last_cur, last_old, last_stable;
+    static u_int cur_dot_cnt, old_dot_cnt, stable_dot_cnt;
     size_t len;
-    char msg[128];
+    uint64_t cur_ts, old_ts, stable_ts;
+    char msg[128], ts_msg[64];
 
     if (g.c_quiet || tag == NULL)
         return;
@@ -44,12 +78,49 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
     else if (tinfo == NULL)
         testutil_check(__wt_snprintf_len_set(
           msg, sizeof(msg), &len, "%4" PRIu32 ": %s: %" PRIu64, g.run_cnt, tag, cnt));
-    else
+    else {
+        ts_msg[0] = '\0';
+        if (g.c_txn_timestamps) {
+            /*
+             * Don't worry about having a completely consistent set of timestamps.
+             */
+            old_ts = g.oldest_timestamp;
+            stable_ts = g.stable_timestamp;
+            cur_ts = g.timestamp;
+
+            if (old_ts != last_old) {
+                ++old_dot_cnt;
+                last_old = old_ts;
+            }
+            if (stable_ts != last_stable) {
+                ++stable_dot_cnt;
+                last_stable = stable_ts;
+            }
+            if (cur_ts != last_cur) {
+                ++cur_dot_cnt;
+                last_cur = cur_ts;
+            }
+
+            if (g.c_txn_rollback_to_stable)
+                testutil_check(__wt_snprintf(ts_msg, sizeof(ts_msg),
+                  " old%s"
+                  "stb%s%s"
+                  "ts%s%s",
+                  track_ts_dots(old_dot_cnt), track_ts_diff(old_ts, stable_ts),
+                  track_ts_dots(stable_dot_cnt), track_ts_diff(stable_ts, cur_ts),
+                  track_ts_dots(cur_dot_cnt)));
+            else
+                testutil_check(__wt_snprintf(ts_msg, sizeof(ts_msg),
+                  " old%s"
+                  "ts%s%s",
+                  track_ts_dots(old_dot_cnt), track_ts_diff(old_ts, cur_ts),
+                  track_ts_dots(cur_dot_cnt)));
+        }
         testutil_check(__wt_snprintf_len_set(msg, sizeof(msg), &len, "%4" PRIu32 ": %s: "
-                                                                     "search %" PRIu64 "%s, "
-                                                                     "insert %" PRIu64 "%s, "
-                                                                     "update %" PRIu64 "%s, "
-                                                                     "remove %" PRIu64 "%s",
+                                                                     "S %" PRIu64 "%s, "
+                                                                     "I %" PRIu64 "%s, "
+                                                                     "U %" PRIu64 "%s, "
+                                                                     "R %" PRIu64 "%s%s",
           g.run_cnt, tag, tinfo->search > M(9) ? tinfo->search / M(1) : tinfo->search,
           tinfo->search > M(9) ? "M" : "",
           tinfo->insert > M(9) ? tinfo->insert / M(1) : tinfo->insert,
@@ -57,13 +128,13 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo)
           tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update,
           tinfo->update > M(9) ? "M" : "",
           tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove,
-          tinfo->remove > M(9) ? "M" : ""));
-
-    if (lastlen > len) {
-        memset(msg + len, ' ', (size_t)(lastlen - len));
-        msg[lastlen] = '\0';
+          tinfo->remove > M(9) ? "M" : "", ts_msg));
+    }
+    if (last_len > len) {
+        memset(msg + len, ' ', (size_t)(last_len - len));
+        msg[last_len] = '\0';
     }
-    lastlen = len;
+    last_len = len;
 
     if (printf("%s\r", msg) < 0)
         testutil_die(EIO, "printf");
@@ -163,16 +234,18 @@ fclose_and_clear(FILE **fpp)
  *     Update the timestamp once.
  */
 void
-timestamp_once(WT_SESSION *session)
+timestamp_once(WT_SESSION *session, bool allow_lag)
 {
     static const char *oldest_timestamp_str = "oldest_timestamp=";
+    static const char *stable_timestamp_str = "stable_timestamp=";
     WT_CONNECTION *conn;
     WT_DECL_RET;
-    char buf[WT_TS_HEX_STRING_SIZE + 64];
+    size_t len;
+    uint64_t all_durable, stable;
+    char buf[WT_TS_HEX_STRING_SIZE * 2 + 64], tsbuf[WT_TS_HEX_STRING_SIZE];
 
     conn = g.wts_conn;
-
-    testutil_check(__wt_snprintf(buf, sizeof(buf), "%s", oldest_timestamp_str));
+    stable = 0ULL;
 
     /*
      * Lock out transaction timestamp operations. The lock acts as a barrier ensuring we've checked
@@ -183,16 +256,57 @@ timestamp_once(WT_SESSION *session)
     if (LOCK_INITIALIZED(&g.ts_lock))
         lock_writelock(session, &g.ts_lock);
 
-    ret = conn->query_timestamp(conn, buf + strlen(oldest_timestamp_str), "get=all_durable");
-    testutil_assert(ret == 0 || ret == WT_NOTFOUND);
-    if (ret == 0)
+    if ((ret = conn->query_timestamp(conn, tsbuf, "get=all_durable")) == 0) {
+        timestamp_parse(session, tsbuf, &all_durable);
+
+        /*
+         * If a lag is permitted, move the oldest timestamp half the way to the current
+         * "all_durable" timestamp.
+         */
+        if (allow_lag)
+            g.oldest_timestamp = (all_durable + g.oldest_timestamp) / 2;
+        else
+            g.oldest_timestamp = all_durable;
+        testutil_check(
+          __wt_snprintf(buf, sizeof(buf), "%s%" PRIx64, oldest_timestamp_str, g.oldest_timestamp));
+
+        /*
+         * When we're doing rollback to stable operations, we'll advance the stable timestamp to the
+         * current timestamp value.
+         */
+        if (g.c_txn_rollback_to_stable) {
+            stable = g.timestamp;
+            len = strlen(buf);
+            WT_ASSERT((WT_SESSION_IMPL *)session, len < sizeof(buf));
+            testutil_check(__wt_snprintf(
+              buf + len, sizeof(buf) - len, ",%s%" PRIx64, stable_timestamp_str, stable));
+        }
         testutil_check(conn->set_timestamp(conn, buf));
+        trace_msg("%-10s oldest=%" PRIu64 ", stable=%" PRIu64, "setts", g.oldest_timestamp, stable);
+        if (g.c_txn_rollback_to_stable)
+            g.stable_timestamp = stable;
+
+    } else
+        testutil_assert(ret == WT_NOTFOUND);
 
     if (LOCK_INITIALIZED(&g.ts_lock))
         lock_writeunlock(session, &g.ts_lock);
 }
 
 /*
+ * timestamp_parse --
+ *     Parse a timestamp to an integral value.
+ */
+void
+timestamp_parse(WT_SESSION *session, const char *str, uint64_t *tsp)
+{
+    char *p;
+
+    *tsp = strtoull(str, &p, 16);
+    WT_ASSERT((WT_SESSION_IMPL *)session, p - str <= 16);
+}
+
+/*
  * timestamp --
  *     Periodically update the oldest timestamp.
  */
@@ -212,16 +326,28 @@ timestamp(void *arg)
     /* Update the oldest timestamp at least once every 15 seconds. */
     done = false;
     do {
+        random_sleep(&g.rnd, 15);
+
         /*
-         * Do a final bump of the oldest timestamp as part of shutting down the worker threads,
-         * otherwise recent operations can prevent verify from running.
+         * If running without rollback_to_stable, do a final bump of the oldest timestamp as part of
+         * shutting down the worker threads, otherwise recent operations can prevent verify from
+         * running.
+         *
+         * With rollback_to_stable configured, don't do a bump at the end of the run. We need the
+         * worker threads to have time to see any changes in the stable timestamp, so they can stash
+         * their stable state - if we bump they will have no time to do that. And when we rollback,
+         * we'd like to see a reasonable amount of data changed. So we don't bump the stable
+         * timestamp, and we can't bump the oldest timestamp as well, as it would get ahead of the
+         * stable timestamp, which is not allowed.
          */
         if (g.workers_finished)
             done = true;
-        else
-            random_sleep(&g.rnd, 15);
 
-        timestamp_once(session);
+        if (!done || !g.c_txn_rollback_to_stable) {
+            timestamp_once(session, true);
+            if (done)
+                timestamp_once(session, true);
+        }
 
     } while (!done);
 
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py
new file mode 100755
index 00000000000..eed6d8e0c26
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable11.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, shutil, time
+from helper import copy_wiredtiger_home
+from test_rollback_to_stable01 import test_rollback_to_stable_base
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+    return '%x' % t
+
+# test_rollback_to_stable11.py
+# Test the rollback to stable is retrieving the proper hs update.
+class test_rollback_to_stable11(test_rollback_to_stable_base):
+    session_config = 'isolation=snapshot'
+
+    prepare_values = [
+        ('no_prepare', dict(prepare=False)),
+        ('prepare', dict(prepare=True))
+    ]
+
+    scenarios = make_scenarios(prepare_values)
+
+    def conn_config(self):
+        config = 'cache_size=1MB,statistics=(all),log=(enabled=true)'
+        return config
+
+    def simulate_crash_restart(self, olddir, newdir):
+        ''' Simulate a crash from olddir and restart in newdir. '''
+        # with the connection still open, copy files to new directory
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        for fname in os.listdir(olddir):
+            fullname = os.path.join(olddir, fname)
+            # Skip lock file on Windows since it is locked
+            if os.path.isfile(fullname) and \
+                "WiredTiger.lock" not in fullname and \
+                "Tmplog" not in fullname and \
+                "Preplog" not in fullname:
+                shutil.copy(fullname, newdir)
+        #
+        # close the original connection and open to new directory
+        # NOTE:  This really cannot test the difference between the
+        # write-no-sync (off) version of log_flush and the sync
+        # version since we're not crashing the system itself.
+        #
+        self.close_conn()
+        self.conn = self.setUpConnectionOpen(newdir)
+        self.session = self.setUpSessionOpen(self.conn)
+
+    def test_rollback_to_stable(self):
+        nrows = 1
+
+        # Create a table without logging.
+        uri = "table:rollback_to_stable11"
+        ds = SimpleDataSet(
+            self, uri, 0, key_format="i", value_format="S", config='log=(enabled=false)')
+        ds.populate()
+
+        # Pin oldest and stable to timestamp 10.
+        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
+            ',stable_timestamp=' + timestamp_str(10))
+
+        value_a = "aaaaa" * 100
+        value_b = "bbbbb" * 100
+        value_c = "ccccc" * 100
+        value_d = "ddddd" * 100
+
+        # Perform several updates.
+        self.large_updates(uri, value_a, ds, nrows, 20)
+        self.large_updates(uri, value_a, ds, nrows, 20)
+        self.large_updates(uri, value_a, ds, nrows, 20)
+        self.large_updates(uri, value_b, ds, nrows, 20)
+
+        # Verify data is visible and correct.
+        self.check(value_b, uri, nrows, 20)
+
+        # Pin stable to timestamp 30 if prepare otherwise 20.
+        if self.prepare:
+            self.conn.set_timestamp('stable_timestamp=' + timestamp_str(30))
+        else:
+            self.conn.set_timestamp('stable_timestamp=' + timestamp_str(20))
+
+        # Checkpoint to ensure that all the updates are flushed to disk.
+        self.session.checkpoint()
+
+        # Simulate a server crash and restart.
+        self.simulate_crash_restart(".", "RESTART")
+
+        # Check that the correct data is seen at and after the stable timestamp.
+        self.check(value_b, uri, nrows, 20)
+
+        # Perform several updates.
+        self.large_updates(uri, value_c, ds, nrows, 30)
+        self.large_updates(uri, value_c, ds, nrows, 30)
+        self.large_updates(uri, value_c, ds, nrows, 30)
+        self.large_updates(uri, value_d, ds, nrows, 30)
+
+        # Verify data is visible and correct.
+        self.check(value_d, uri, nrows, 30)
+
+        # Checkpoint to ensure that all the updates are flushed to disk.
+        self.session.checkpoint()
+
+        # Simulate a server crash and restart.
+        self.simulate_crash_restart("RESTART", "RESTART2")
+
+        # Check that the correct data is seen at and after the stable timestamp.
+        self.check(value_b, uri, nrows, 20)
+        self.check(value_b, uri, nrows, 40)
+
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        calls = stat_cursor[stat.conn.txn_rts][2]
+        hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2]
+        hs_sweep = stat_cursor[stat.conn.txn_rts_sweep_hs_keys][2]
+        keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2]
+        keys_restored = stat_cursor[stat.conn.txn_rts_keys_restored][2]
+        pages_visited = stat_cursor[stat.conn.txn_rts_pages_visited][2]
+        upd_aborted = stat_cursor[stat.conn.txn_rts_upd_aborted][2]
+        stat_cursor.close()
+
+        self.assertEqual(calls, 0)
+        self.assertEqual(keys_removed, 0)
+        self.assertEqual(keys_restored, 0)
+        self.assertEqual(upd_aborted, 0)
+        self.assertGreater(pages_visited, 0)
+        self.assertEqual(hs_removed, 4)
+        self.assertEqual(hs_sweep, 0)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py
new file mode 100755
index 00000000000..b4fa7a9087b
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable12.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, shutil, time
+from helper import copy_wiredtiger_home
+from test_rollback_to_stable01 import test_rollback_to_stable_base
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+    return '%x' % t
+
+# test_rollback_to_stable12.py
+# Test the rollback to stable operation skipping subtrees in during tree walk.
+class test_rollback_to_stable12(test_rollback_to_stable_base):
+    session_config = 'isolation=snapshot'
+
+    prepare_values = [
+        ('no_prepare', dict(prepare=False)),
+        ('prepare', dict(prepare=True))
+    ]
+
+    scenarios = make_scenarios(prepare_values)
+
+    def conn_config(self):
+        config = 'cache_size=500MB,statistics=(all),log=(enabled=true)'
+        return config
+
+    def simulate_crash_restart(self, olddir, newdir):
+        ''' Simulate a crash from olddir and restart in newdir. '''
+        # with the connection still open, copy files to new directory
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        for fname in os.listdir(olddir):
+            fullname = os.path.join(olddir, fname)
+            # Skip lock file on Windows since it is locked
+            if os.path.isfile(fullname) and \
+                "WiredTiger.lock" not in fullname and \
+                "Tmplog" not in fullname and \
+                "Preplog" not in fullname:
+                shutil.copy(fullname, newdir)
+        #
+        # close the original connection and open to new directory
+        # NOTE:  This really cannot test the difference between the
+        # write-no-sync (off) version of log_flush and the sync
+        # version since we're not crashing the system itself.
+        #
+        self.close_conn()
+        self.conn = self.setUpConnectionOpen(newdir)
+        self.session = self.setUpSessionOpen(self.conn)
+
+    def test_rollback_to_stable(self):
+        nrows = 1000000
+
+        # Create a table without logging.
+        uri = "table:rollback_to_stable12"
+        ds = SimpleDataSet(
+            self, uri, 0, key_format="i", value_format="S", config='split_pct=50,log=(enabled=false)')
+        ds.populate()
+
+        # Pin oldest and stable to timestamp 10.
+        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
+            ',stable_timestamp=' + timestamp_str(10))
+
+        value_a = "aaaaa" * 100
+        value_b = "bbbbb" * 100
+
+        # Perform several updates.
+        self.large_updates(uri, value_a, ds, nrows, 20)
+
+        # Verify data is visible and correct.
+        self.check(value_a, uri, nrows, 20)
+
+        # Pin stable to timestamp 30 if prepare otherwise 20.
+        if self.prepare:
+            self.conn.set_timestamp('stable_timestamp=' + timestamp_str(30))
+        else:
+            self.conn.set_timestamp('stable_timestamp=' + timestamp_str(20))
+
+        # Load a single row modification to be removed.
+        commit_ts = 30
+        cursor = self.session.open_cursor(uri)
+        self.session.begin_transaction()
+        cursor[ds.key(1)] = value_b
+        if self.prepare:
+            self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(commit_ts-1))
+            self.session.timestamp_transaction('commit_timestamp=' + timestamp_str(commit_ts))
+            self.session.timestamp_transaction('durable_timestamp=' + timestamp_str(commit_ts+1))
+            self.session.commit_transaction()
+        else:
+            self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts))
+        cursor.close()
+
+        self.session.checkpoint()
+
+        # Simulate a server crash and restart.
+        self.simulate_crash_restart(".", "RESTART")
+
+        # Check that the correct data is seen at and after the stable timestamp.
+        self.check(value_a, uri, nrows, 30)
+
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        calls = stat_cursor[stat.conn.txn_rts][2]
+        hs_removed = stat_cursor[stat.conn.txn_rts_hs_removed][2]
+        hs_sweep = stat_cursor[stat.conn.txn_rts_sweep_hs_keys][2]
+        keys_removed = stat_cursor[stat.conn.txn_rts_keys_removed][2]
+        keys_restored = stat_cursor[stat.conn.txn_rts_keys_restored][2]
+        pages_visited = stat_cursor[stat.conn.txn_rts_pages_visited][2]
+        pages_walk_skipped = stat_cursor[stat.conn.txn_rts_skip_interal_pages_walk][2]
+        upd_aborted = stat_cursor[stat.conn.txn_rts_upd_aborted][2]
+        stat_cursor.close()
+
+        self.assertEqual(calls, 0)
+        self.assertEqual(keys_removed, 0)
+        self.assertEqual(keys_restored, 0)
+        self.assertGreaterEqual(upd_aborted, 0)
+        self.assertGreater(pages_visited, 0)
+        self.assertGreaterEqual(hs_removed, 0)
+        self.assertEqual(hs_sweep, 0)
+        self.assertGreater(pages_walk_skipped, 0)
+
+if __name__ == '__main__':
+    wttest.run()