280 files changed, 4509 insertions, 2766 deletions
diff --git a/.gitignore b/.gitignore
index 4c35f527d92..7cb6d71c0d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -176,6 +176,9 @@ src/mongo/embedded/java/aar/build/
 src/mongo/embedded/java/jar/build/
 local.properties
 
+# clangd language server
+.clangd/
+
 compile_commands.json
 generated_resmoke_config
 selected_tests_config
diff --git a/README.third_party.md b/README.third_party.md
index 11ade4b1b24..b91064f9d7a 100644
--- a/README.third_party.md
+++ b/README.third_party.md
@@ -19,40 +19,40 @@ not authored by MongoDB, and has a license which requires reproduction,
 a notice will be included in
 `THIRD-PARTY-NOTICES`.
 
-| Name                       | License           | Upstream Version | Vendored Version  | Emits persisted data | Distributed in Release Binaries |
-| ---------------------------| ----------------- | ---------------- | ------------------| :------------------: | :-----------------------------: |
-| [abseil-cpp]               | Apache-2.0        |                  | 070f6e47b3        |                      |                ✗                |
-| Aladdin MD5                | Zlib              |                  | Unknown           |          ✗           |                ✗                |
-| [ASIO]                     | BSL-1.0           | 1.16.1           | b0926b61b0        |                      |                ✗                |
-| [benchmark]                | Apache-2.0        | 1.5.1            | 1.5.0             |                      |                                 |
-| [Boost]                    | BSL-1.0           | 1.73.0           | 1.70.0            |                      |                ✗                |
-| [fmt]                      | BSD-2-Clause      | 6.2.1            | 6.1.1             |                      |                ✗                |
-| [GPerfTools]               | BSD-3-Clause      | 2.8              | 2.8               |                      |                ✗                |
-| [ICU4]                     | ICU               | 67.1             | 57.1              |          ✗           |                ✗                |
-| [Intel Decimal FP Library] | BSD-3-Clause      | 2.0 Update 2     | 2.0 Update 1      |                      |                ✗                |
-| [JSON-Schema-Test-Suite]   | MIT               |                  | 728066f9c5        |                      |                                 |
-| [kms-message]              |                   |                  | 75e391a037        |                      |                ✗                |
-| [libstemmer]               | BSD-3-Clause      |                  | Unknown           |          ✗           |                ✗                |
-| [linenoise]                | BSD-3-Clause      |                  | Unknown + changes |                      |                ✗                |
-| [MozJS]                    | MPL-2.0           | ESR 68.9         | ESR 60.3.0        |                      |                ✗                |
-| [MurmurHash3]              | Public Domain     |                  | Unknown + changes |          ✗           |                ✗                |
-| [ocspbuilder]              | MIT               | 0.10.2           | 0.10.2            |                      |                                 |
-| [ocspresponder]            | Apache-2.0        | 0.5.0            | 0.5.0             |                      |                                 |
-| [peglib]                   | MIT               | 0.1.12           | 0.1.12            |                      |                ✗                |
-| [Pcre]                     | BSD-3-Clause      | 8.44             | 8.42              |                      |                ✗                |
-| [S2]                       | Apache-2.0        |                  | Unknown           |          ✗           |                ✗                |
-| [SafeInt]                  | MIT               | 3.24             | 3.23              |                      |                                 |
-| [scons]                    | MIT               | 3.1.2            | 3.1.2             |                      |                                 |
-| [Snappy]                   | BSD-3-Clause      | 1.1.8            | 1.1.7             |          ✗           |                ✗                |
-| [timelib]                  | MIT               | 2018.03          | 2018.01           |                      |                ✗                |
-| [TomCrypt]                 | Public Domain     | 1.18.2           | 1.18.2            |          ✗           |                ✗                |
-| [Unicode]                  | Unicode-DFS-2015  | 13.0.0           | 8.0.0             |          ✗           |                ✗                |
-| [Valgrind]                 | BSD-3-Clause<sup>\[<a href="#note_vg" id="ref_vg">1</a>]</sup> | 3.16.1 | 3.11.0 | |             ✗                |
-| [variant]                  | BSL-1.0           | 1.4.0            | 1.4.0             |                      |                ✗                |
-| [wiredtiger]               |                   |                  | <sup>\[<a href="#note_wt" id="ref_wt">2</a>]</sup> | ✗ |  ✗                |
-| [yaml-cpp]                 | MIT               | 0.6.3            | 0.6.2             |                      |                ✗                |
-| [Zlib]                     | Zlib              | 1.2.11           | 1.2.11            |          ✗           |                ✗                |
-| [Zstandard]                | BSD-3-Clause      | 1.4.5            | 1.4.4             |          ✗           |                ✗                |
+| Name                       | License           | Vendored Version  | Emits persisted data | Distributed in Release Binaries |
+| ---------------------------| ----------------- | ------------------| :------------------: | :-----------------------------: |
+| [abseil-cpp]               | Apache-2.0        | 070f6e47b3        |                      |                ✗                |
+| Aladdin MD5                | Zlib              | Unknown           |          ✗           |                ✗                |
+| [ASIO]                     | BSL-1.0           | b0926b61b0        |                      |                ✗                |
+| [benchmark]                | Apache-2.0        | 1.5.0             |                      |                                 |
+| [Boost]                    | BSL-1.0           | 1.70.0            |                      |                ✗                |
+| [fmt]                      | BSD-2-Clause      | 6.1.1             |                      |                ✗                |
+| [GPerfTools]               | BSD-3-Clause      | 2.8               |                      |                ✗                |
+| [ICU4]                     | ICU               | 57.1              |          ✗           |                ✗                |
+| [Intel Decimal FP Library] | BSD-3-Clause      | 2.0 Update 1      |                      |                ✗                |
+| [JSON-Schema-Test-Suite]   | MIT               | 728066f9c5        |                      |                                 |
+| [kms-message]              |                   | 75e391a037        |                      |                ✗                |
+| [libstemmer]               | BSD-3-Clause      | Unknown           |          ✗           |                ✗                |
+| [linenoise]                | BSD-3-Clause      | Unknown + changes |                      |                ✗                |
+| [MozJS]                    | MPL-2.0           | ESR 60.3.0        |                      |                ✗                |
+| [MurmurHash3]              | Public Domain     | Unknown + changes |          ✗           |                ✗                |
+| [ocspbuilder]              | MIT               | 0.10.2            |                      |                                 |
+| [ocspresponder]            | Apache-2.0        | 0.5.0             |                      |                                 |
+| [peglib]                   | MIT               | 0.1.12            |                      |                ✗                |
+| [Pcre]                     | BSD-3-Clause      | 8.42              |                      |                ✗                |
+| [S2]                       | Apache-2.0        | Unknown           |          ✗           |                ✗                |
+| [SafeInt]                  | MIT               | 3.23              |                      |                                 |
+| [scons]                    | MIT               | 3.1.2             |                      |                                 |
+| [Snappy]                   | BSD-3-Clause      | 1.1.7             |          ✗           |                ✗                |
+| [timelib]                  | MIT               | 2018.01           |                      |                ✗                |
+| [TomCrypt]                 | Public Domain     | 1.18.2            |          ✗           |                ✗                |
+| [Unicode]                  | Unicode-DFS-2015  | 8.0.0             |          ✗           |                ✗                |
+| [Valgrind]                 | BSD-3-Clause<sup>\[<a href="#note_vg" id="ref_vg">1</a>]</sup> | 3.11.0 | |             ✗                |
+| [variant]                  | BSL-1.0           | 1.4.0             |                      |                ✗                |
+| [wiredtiger]               |                   | <sup>\[<a href="#note_wt" id="ref_wt">2</a>]</sup> | ✗ |  ✗                |
+| [yaml-cpp]                 | MIT               | 0.6.2             |                      |                ✗                |
+| [Zlib]                     | Zlib              | 1.2.11            |          ✗           |                ✗                |
+| [Zstandard]                | BSD-3-Clause      | 1.4.4             |          ✗           |                ✗                |
 
 [abseil-cpp]: https://github.com/abseil/abseil-cpp
 [ASIO]: https://github.com/chriskohlhoff/asio
diff --git a/buildscripts/resmokelib/core/process.py b/buildscripts/resmokelib/core/process.py
index 2c458e4320f..b3d19bf599a 100644
--- a/buildscripts/resmokelib/core/process.py
+++ b/buildscripts/resmokelib/core/process.py
@@ -128,7 +128,7 @@ class Process(object):
                     logger=self.logger.name.replace('/', '-'),
                     process=os.path.basename(self.args[0]), pid=self.pid, t=now_str)
                 recorder_args = [
-                    _config.UNDO_RECORDER_PATH, "--thread-fuzzing", "-p",
+                    _config.UNDO_RECORDER_PATH, "-p",
                     str(self.pid), "-o", recorder_output_file
                 ]
                 self._recorder = subprocess.Popen(recorder_args, bufsize=buffer_size, env=self.env,
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 322f0dad785..ae045bcedf4 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -66,6 +66,16 @@ all:
     test_file: jstests/replsets/secondaryOk_slaveOk_aliases.js
   - ticket: SERVER-43902
     test_file: jstests/sharding/scaled_collection_stats.js
+  - ticket: SERVER-50416
+    test_file: jstests/replsets/disconnect_on_legacy_write_to_secondary.js
+  - ticket: SERVER-50417
+    test_file: jstests/replsets/no_disconnect_on_stepdown.js
+  - ticket: SERVER-50417
+    test_file: jstests/replsets/not_master_unacknowledged_write.js
+  - ticket: SERVER-50417
+    test_file: jstests/replsets/read_operations_during_step_down.js
+  - ticket: SERVER-50417
+    test_file: jstests/replsets/read_operations_during_step_up.js
 
 # Tests that should only be excluded from particular suites should be listed under that suite.
 suites:
diff --git a/etc/evergreen.yml b/etc/evergreen.yml
index 3935a97758d..de9f17fc7f7 100644
--- a/etc/evergreen.yml
+++ b/etc/evergreen.yml
@@ -12751,6 +12751,7 @@ buildvariants:
     num_scons_link_jobs_available: 0.99
     record_with: --recordWith /opt/undodb5/bin/live-record
     exec_timeout_secs: 14400 # 4 hours
+    test_flags: --excludeWithAnyTags=requires_fast_memory
   tasks:
   - name: compile_without_package_TG
   - name: .jscore .common
@@ -12764,6 +12765,24 @@ buildvariants:
   - name: sharded_jscore_txns_sharded_collections
   - name: sharding_jscore_passthrough
   - name: sharding_jscore_op_query_passthrough
+  - name: aggregation
+  - name: aggregation_auth
+  - name: aggregation_disabled_optimization
+  - name: aggregation_ese
+  - name: aggregation_ese_gcm
+  - name: aggregation_facet_unwind_passthrough
+  - name: aggregation_slot_based_execution
+  - name: .auth .gle
+  - name: .jscore .encrypt
+  - name: noPassthroughWithMongod_gen
+  - name: parallel_compatibility
+  - name: serial_run
+  - name: session_jscore_passthrough
+  - name: .aggfuzzer
+  - name: query_fuzzer_standalone_gen
+  - name: update_fuzzer_gen
+  - name: jstestfuzz_gen
+  - name: jstestfuzz_interrupt_gen
 
 - <<: *enterprise-rhel-62-64-bit-dynamic-required-template
   name: rhel-62-64-bit-dynamic-visibility-test
diff --git a/jstests/auth/repl.js b/jstests/auth/repl.js
index 06aa38a38a7..6f5b7ed0dcb 100644
--- a/jstests/auth/repl.js
+++ b/jstests/auth/repl.js
@@ -1,4 +1,4 @@
-// Test that authorization information gets propogated correctly to secondaries and slaves.
+// Test that authorization information gets propogated correctly to secondaries.
 
 var baseName = "jstests_auth_repl";
 var rsName = baseName + "_rs";
@@ -26,7 +26,7 @@ var AuthReplTest = function(spec) {
     assert(adminPri.auth("super", "super"), "could not authenticate as superuser");
 
     if (secondaryConn != null) {
-        secondaryConn.setSlaveOk(true);
+        secondaryConn.setSecondaryOk();
         adminSec = secondaryConn.getDB("admin");
     }
 
@@ -38,7 +38,7 @@ var AuthReplTest = function(spec) {
 
     /**
      * Use the rolesInfo command to check that the test
-     * role is as expected on the secondary/slave
+     * role is as expected on the secondary
      */
     var confirmRolesInfo = function(actionType) {
         var role = adminSec.getRole(testRole, {showPrivileges: true});
@@ -48,7 +48,7 @@ var AuthReplTest = function(spec) {
 
     /**
      * Use the usersInfo command to check that the test
-     * user is as expected on the secondary/slave
+     * user is as expected on the secondary
      */
     var confirmUsersInfo = function(roleName) {
         var user = adminSec.getUser(testUser);
@@ -58,7 +58,7 @@ var AuthReplTest = function(spec) {
 
     /**
      * Ensure that the test user has the proper privileges
-     * on the secondary/slave
+     * on the secondary
      */
     var confirmPrivilegeBeforeUpdate = function() {
         // can run hostInfo
@@ -87,7 +87,7 @@ var AuthReplTest = function(spec) {
 
     /**
      * Ensure that the auth changes have taken effect
-     * properly on the secondary/slave
+     * properly on the secondary
      */
     var confirmPrivilegeAfterUpdate = function() {
         // cannot run hostInfo
@@ -117,7 +117,7 @@ var AuthReplTest = function(spec) {
      */
     that.setSecondary = function(secondary) {
         secondaryConn = secondary;
-        secondaryConn.setSlaveOk(true);
+        secondaryConn.setSecondaryOk();
         adminSec = secondaryConn.getDB("admin");
     };
 
@@ -149,7 +149,7 @@ var AuthReplTest = function(spec) {
 
     /**
      * Top-level test for updating users and roles and ensuring that the update
-     * has the correct effect on the secondary/slave
+     * has the correct effect on the secondary
      */
     that.testAll = function() {
         authOnSecondary();
diff --git a/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js b/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
index 054f0c5ca15..f1c6ad28b9b 100644
--- a/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
+++ b/jstests/concurrency/fsm_workloads/auth_privilege_consistency.js
@@ -72,7 +72,7 @@ var $config = (function() {
                         // Create a new connection to any node which isn't "me".
                         const conn = new Mongo(node);
                         assert(conn);
-                        conn.setSlaveOk();
+                        conn.setSecondaryOk();
                         RSnodes.push(conn);
                     });
 
diff --git a/jstests/core/resume_query_from_non_existent_record.js b/jstests/core/resume_query_from_non_existent_record.js
new file mode 100644
index 00000000000..954325a5763
--- /dev/null
+++ b/jstests/core/resume_query_from_non_existent_record.js
@@ -0,0 +1,66 @@
+/**
+ * Test that an error is raised when we try to resume a query from a record which doesn't exist.
+ *
+ * @tags: [
+ *  assumes_against_mongod_not_mongos,
+ *  requires_find_command,
+ *  multiversion_incompatible,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+const collName = "resume_query_from_non_existent_record";
+const coll = db[collName];
+
+coll.drop();
+
+const testData = [{_id: 0, a: 1}, {_id: 1, a: 2}, {_id: 2, a: 3}];
+assert.commandWorked(coll.insert(testData));
+
+// Run the initial query and request to return a resume token. We're interested only in a single
+// document, so 'batchSize' is set to 1.
+let res = assert.commandWorked(
+    db.runCommand({find: collName, hint: {$natural: 1}, batchSize: 1, $_requestResumeToken: true}));
+assert.eq(1, res.cursor.firstBatch.length);
+assert.contains(res.cursor.firstBatch[0], testData);
+const savedData = res.cursor.firstBatch;
+
+// Make sure the query returned a resume token which will be used to resume the query from.
+assert.hasFields(res.cursor, ["postBatchResumeToken"]);
+const resumeToken = res.cursor.postBatchResumeToken;
+
+// Kill the cursor before attempting to resume.
+assert.commandWorked(db.runCommand({killCursors: collName, cursors: [res.cursor.id]}));
+
+// Try to resume the query from the saved resume token.
+res = assert.commandWorked(db.runCommand({
+    find: collName,
+    hint: {$natural: 1},
+    batchSize: 1,
+    $_requestResumeToken: true,
+    $_resumeAfter: resumeToken
+}));
+assert.eq(1, res.cursor.firstBatch.length);
+assert.contains(res.cursor.firstBatch[0], testData);
+assert.neq(savedData[0], res.cursor.firstBatch[0]);
+
+// Kill the cursor before attempting to resume.
+assert.commandWorked(db.runCommand({killCursors: collName, cursors: [res.cursor.id]}));
+
+// Delete a document which corresponds to the saved resume token, so that we can guarantee it does
+// not exist.
+assert.commandWorked(coll.remove({_id: savedData[0]._id}, {justOne: true}));
+
+// Try to resume the query from the same token and check that it fails to position the cursor to
+// the record specified in the resume token.
+assert.commandFailedWithCode(db.runCommand({
+    find: collName,
+    hint: {$natural: 1},
+    batchSize: 1,
+    $_requestResumeToken: true,
+    $_resumeAfter: resumeToken
+}),
+                             ErrorCodes.KeyNotFound);
+})();
diff --git a/jstests/core/shell1.js b/jstests/core/shell1.js
index 7ea23f8d3a5..4fc4c3a1c15 100644
--- a/jstests/core/shell1.js
+++ b/jstests/core/shell1.js
@@ -4,11 +4,15 @@ shellHelper("show", "tables;");
 shellHelper("show", "tables");
 shellHelper("show", "tables ;");
 
-// test slaveOk levels
-assert(!db.getSlaveOk() && !db.test.getSlaveOk() && !db.getMongo().getSlaveOk(), "slaveOk 1");
-db.getMongo().setSlaveOk();
-assert(db.getSlaveOk() && db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 2");
-db.setSlaveOk(false);
-assert(!db.getSlaveOk() && !db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 3");
-db.test.setSlaveOk(true);
-assert(!db.getSlaveOk() && db.test.getSlaveOk() && db.getMongo().getSlaveOk(), "slaveOk 4");
+// test secondaryOk levels
+assert(!db.getSecondaryOk() && !db.test.getSecondaryOk() && !db.getMongo().getSecondaryOk(),
+       "secondaryOk 1");
+db.getMongo().setSecondaryOk();
+assert(db.getSecondaryOk() && db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+       "secondaryOk 2");
+db.setSecondaryOk(false);
+assert(!db.getSecondaryOk() && !db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+       "secondaryOk 3");
+db.test.setSecondaryOk();
+assert(!db.getSecondaryOk() && db.test.getSecondaryOk() && db.getMongo().getSecondaryOk(),
+       "secondaryOk 4");
diff --git a/jstests/core/views/views_all_commands.js b/jstests/core/views/views_all_commands.js
index 2f9d0cc1c60..04c4a34eabd 100644
--- a/jstests/core/views/views_all_commands.js
+++ b/jstests/core/views/views_all_commands.js
@@ -338,6 +338,7 @@ let viewsCommandTests = {
     hello: {skip: isUnrelated},
     hostInfo: {skip: isUnrelated},
     httpClientRequest: {skip: isAnInternalCommand},
+    importCollection: {skip: isUnrelated},
     insert: {command: {insert: "view", documents: [{x: 1}]}, expectFailure: true},
     internalRenameIfOptionsAndIndexesMatch: {skip: isAnInternalCommand},
     invalidateUserCache: {skip: isUnrelated},
diff --git a/jstests/hooks/validate_collections.js b/jstests/hooks/validate_collections.js
index cf7f1be9707..856191ca51c 100644
--- a/jstests/hooks/validate_collections.js
+++ b/jstests/hooks/validate_collections.js
@@ -88,7 +88,7 @@ function CollectionValidator() {
         try {
             print('Running validate() on ' + host);
             const conn = new Mongo(host);
-            conn.setSlaveOk();
+            conn.setSecondaryOk();
             jsTest.authenticate(conn);
 
             // Skip validating collections for arbiters.
diff --git a/jstests/libs/kill_sessions.js b/jstests/libs/kill_sessions.js
index b4643ec8ed5..1dabe9c9cea 100644
--- a/jstests/libs/kill_sessions.js
+++ b/jstests/libs/kill_sessions.js
@@ -120,7 +120,7 @@ var _kill_sessions_api_module = (function() {
         // hosts.  We identify particular ops by secs sleeping.
         this.visit(function(client) {
             let admin = client.getDB("admin");
-            admin.getMongo().setSlaveOk();
+            admin.getMongo().setSecondaryOk();
 
             assert.soon(function() {
                 let inProgressOps = admin.aggregate([{$currentOp: {'allUsers': true}}]);
@@ -183,7 +183,7 @@ var _kill_sessions_api_module = (function() {
     Fixture.prototype.assertNoSessionsInCursors = function() {
         this.visit(function(client) {
             var db = client.getDB("admin");
-            db.setSlaveOk();
+            db.setSecondaryOk();
             assert.soon(() => {
                 let cursors = db.aggregate([
                                     {"$currentOp": {"idleCursors": true, "allUsers": true}}
@@ -205,7 +205,7 @@ var _kill_sessions_api_module = (function() {
             });
 
             var db = client.getDB("admin");
-            db.setSlaveOk();
+            db.setSecondaryOk();
             var cursors = db.aggregate([
                                 {"$currentOp": {"idleCursors": true, "allUsers": true}},
                                 {"$match": {type: "idleCursor"}}
diff --git a/jstests/libs/override_methods/validate_collections_on_shutdown.js b/jstests/libs/override_methods/validate_collections_on_shutdown.js
index a1e56fd1ca8..a378d6e390a 100644
--- a/jstests/libs/override_methods/validate_collections_on_shutdown.js
+++ b/jstests/libs/override_methods/validate_collections_on_shutdown.js
@@ -29,8 +29,8 @@ MongoRunner.validateCollectionsCallback = function(port) {
         return;
     }
 
-    // Set slaveOk=true so that we can run commands against any secondaries.
-    conn.setSlaveOk();
+    // Set secondaryOk=true so that we can run commands against any secondaries.
+    conn.setSecondaryOk();
 
     let dbNames;
     let result =
diff --git a/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js b/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
index 3e855455985..e6191f97449 100644
--- a/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
+++ b/jstests/noPassthrough/apply_ops_DDL_operation_does_not_take_global_X.js
@@ -29,7 +29,7 @@ assert.commandWorked(secondary.getDB("admin").runCommand(
     {configureFailPoint: "waitInFindBeforeMakingBatch", mode: "alwaysOn"}));
 
 const findWait = startParallelShell(function() {
-    db.getMongo().setSlaveOk();
+    db.getMongo().setSecondaryOk();
     assert.eq(
         db.getSiblingDB('read').getCollection('readColl').find().comment('read hangs').itcount(),
         1);
diff --git a/jstests/noPassthrough/change_stream_error_label.js b/jstests/noPassthrough/change_stream_error_label.js
index 899207b3bd1..2b326a22cd7 100644
--- a/jstests/noPassthrough/change_stream_error_label.js
+++ b/jstests/noPassthrough/change_stream_error_label.js
@@ -12,9 +12,9 @@ rst.startSet();
 rst.initiate();
 rst.awaitSecondaryNodes();
 
-// Disable "slaveOk" on the connection so that we are not allowed to run on the Secondary.
+// Disable "secondaryOk" on the connection so that we are not allowed to run on the Secondary.
 const testDB = rst.getSecondary().getDB(jsTestName());
-testDB.getMongo().setSlaveOk(false);
+testDB.getMongo().setSecondaryOk(false);
 const coll = testDB.test;
 
 // Issue a change stream. We should fail with a NotPrimaryNoSecondaryOk error.
@@ -28,8 +28,8 @@ assert.contains("ResumableChangeStreamError", err.errorLabels, err);
 // Now verify that the 'failGetMoreAfterCursorCheckout' failpoint can effectively exercise the
 // error label generation logic for change stream getMores.
 function testFailGetMoreAfterCursorCheckoutFailpoint({errorCode, expectedLabel}) {
-    // Re-enable "slaveOk" on the test connection.
-    testDB.getMongo().setSlaveOk(true);
+    // Re-enable "secondaryOk" on the test connection.
+    testDB.getMongo().setSecondaryOk();
 
     // Activate the failpoint and set the exception that it will throw.
     assert.commandWorked(testDB.adminCommand({
diff --git a/jstests/noPassthrough/out_merge_on_secondary_killop.js b/jstests/noPassthrough/out_merge_on_secondary_killop.js
index 7cdc25d8eae..d5863374f96 100644
--- a/jstests/noPassthrough/out_merge_on_secondary_killop.js
+++ b/jstests/noPassthrough/out_merge_on_secondary_killop.js
@@ -57,7 +57,7 @@ function testKillOp(pipeline, comment, failpointName) {
     // Run the aggregate and ensure that it is interrupted.
     const runAggregate = `
             const testDB = db.getSiblingDB("${kDBName}");
-            testDB.setSlaveOk(true);
+            testDB.setSecondaryOk();
             const res = testDB.runCommand({
                 aggregate: "inputColl",
                 pipeline: ${tojson(pipeline)},
diff --git a/jstests/noPassthrough/server_transaction_metrics_secondary.js b/jstests/noPassthrough/server_transaction_metrics_secondary.js
index 9282b19bea0..3a337e17fde 100644
--- a/jstests/noPassthrough/server_transaction_metrics_secondary.js
+++ b/jstests/noPassthrough/server_transaction_metrics_secondary.js
@@ -19,8 +19,8 @@ replTest.initiate(config);
 const primary = replTest.getPrimary();
 const secondary = replTest.getSecondary();
 
-// Set slaveOk=true so that normal read commands would be allowed on the secondary.
-secondary.setSlaveOk(true);
+// Set secondaryOk=true so that normal read commands would be allowed on the secondary.
+secondary.setSecondaryOk();
 
 // Create a test collection that we can run commands against.
 assert.commandWorked(primary.getDB(dbName)[collName].insert({_id: 0}));
diff --git a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
index 3a43603e935..a00cabc89a9 100644
--- a/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
+++ b/jstests/noPassthrough/startup_recovery_truncates_oplog_holes_after_primary_crash.js
@@ -87,7 +87,7 @@ assert.soonNoExcept(function() {
 });
 
 // Confirm that the write with the oplog hold behind it is now gone (truncated) as expected.
-primary.setSlaveOk();
+primary.setSecondaryOk();
 const find = primary.getDB(dbName).getCollection(collName).findOne({_id: "writeAfterHole"});
 assert.eq(find, null);
 
diff --git a/jstests/noPassthrough/stepdown_query.js b/jstests/noPassthrough/stepdown_query.js
index 4809e471f7a..239497725ff 100644
--- a/jstests/noPassthrough/stepdown_query.js
+++ b/jstests/noPassthrough/stepdown_query.js
@@ -25,8 +25,8 @@ var collName = jsTest.name();
 
 function runTest(host, rst, waitForPrimary) {
     // We create a new connection to 'host' here instead of passing in the original connection.
-    // This to work around the fact that connections created by ReplSetTest already have slaveOk
-    // set on them, but we need a connection with slaveOk not set for this test.
+    // This to work around the fact that connections created by ReplSetTest already have secondaryOk
+    // set on them, but we need a connection with secondaryOk not set for this test.
     var conn = new Mongo(host);
     var coll = conn.getDB(dbName).getCollection(collName);
     assert(!coll.exists());
@@ -51,7 +51,7 @@ function runTest(host, rst, waitForPrimary) {
     } catch (e) {
     }
 
-    // Even though our connection doesn't have slaveOk set, we should still be able to iterate
+    // Even though our connection doesn't have secondaryOk set, we should still be able to iterate
     // our cursor and kill our cursor.
     assert(cursor.hasNext());
     assert.doesNotThrow(function() {
diff --git a/jstests/noPassthrough/timestamp_index_builds.js b/jstests/noPassthrough/timestamp_index_builds.js
index 8e13ff0d21c..f7995108f34 100644
--- a/jstests/noPassthrough/timestamp_index_builds.js
+++ b/jstests/noPassthrough/timestamp_index_builds.js
@@ -87,7 +87,7 @@ for (let nodeIdx = 0; nodeIdx < 2; ++nodeIdx) {
         jsTestLog("Starting as a replica set. Both indexes should exist. Node: " + nodeIdentity);
         let conn = rst.start(nodeIdx, {startClean: false}, true);
         rst.waitForState(conn, ReplSetTest.State.SECONDARY);
-        conn.setSlaveOk();
+        conn.setSecondaryOk();
         IndexBuildTest.assertIndexes(getColl(conn), 2, ['_id_', 'foo_1']);
         rst.stop(nodeIdx);
     }
diff --git a/jstests/noPassthroughWithMongod/geo_polygon.js b/jstests/noPassthroughWithMongod/geo_polygon.js
index ce7f9ebf67c..d0085fa1f92 100644
--- a/jstests/noPassthroughWithMongod/geo_polygon.js
+++ b/jstests/noPassthroughWithMongod/geo_polygon.js
@@ -16,7 +16,9 @@ for (x = -180; x < 180; x += .5) {
 assert.commandWorked(bulk.execute());
 
 var numTests = 31;
-for (var n = 0; n < numTests; n++) {
+// Reduce the amount of repetitions on live-record buildvariant
+var start = (TestData.undoRecorderPath ? 20 : 0);
+for (var n = start; n < numTests; n++) {
     t.dropIndexes();
     t.ensureIndex({loc: "2d"}, {bits: 2 + n});
 
diff --git a/jstests/noPassthroughWithMongod/indexbg_interrupts.js b/jstests/noPassthroughWithMongod/indexbg_interrupts.js
index a1bf783f032..420fb2b6d96 100644
--- a/jstests/noPassthroughWithMongod/indexbg_interrupts.js
+++ b/jstests/noPassthroughWithMongod/indexbg_interrupts.js
@@ -32,7 +32,8 @@ var checkOp = function(checkDB) {
 
 var dbname = 'bgIndexSec';
 var collection = 'jstests_feh';
-var size = 100000;
+// Reduce the amount of data on live-record buildvariant
+var size = (TestData.undoRecorderPath ? 10000 : 100000);
 
 // Set up replica set
 var replTest = new ReplSetTest({name: 'bgIndex', nodes: 3});
diff --git a/jstests/noPassthroughWithMongod/no_balance_collection.js b/jstests/noPassthroughWithMongod/no_balance_collection.js
index 38182f1c481..2ffaf7aecfc 100644
--- a/jstests/noPassthroughWithMongod/no_balance_collection.js
+++ b/jstests/noPassthroughWithMongod/no_balance_collection.js
@@ -78,7 +78,9 @@ st.waitForBalancer(true, 60000);
 var lastMigration = sh._lastMigration(collB);
 
 var bulk = collB.initializeUnorderedBulkOp();
-for (var i = 0; i < 1000000; i++) {
+// Reduce the amount of data on live-record buildvariant
+var n = (TestData.undoRecorderPath ? 100000 : 1000000);
+for (var i = 0; i < n; i++) {
     bulk.insert({_id: i, hello: "world"});
 }
 assert.commandWorked(bulk.execute());
diff --git a/jstests/noPassthroughWithMongod/replReads.js b/jstests/noPassthroughWithMongod/replReads.js
index 5c40dbd900c..fde1143911c 100644
--- a/jstests/noPassthroughWithMongod/replReads.js
+++ b/jstests/noPassthroughWithMongod/replReads.js
@@ -1,4 +1,4 @@
-// Test that doing slaveOk reads from secondaries hits all the secondaries evenly
+// Test that doing secondaryOk reads from secondaries hits all the secondaries evenly
 // @tags: [requires_sharding]
 
 function testReadLoadBalancing(numReplicas) {
@@ -52,7 +52,7 @@ function testReadLoadBalancing(numReplicas) {
 
     for (var i = 0; i < secondaries.length * 10; i++) {
         conn = new Mongo(s._mongos[0].host);
-        conn.setSlaveOk();
+        conn.setSecondaryOk();
         conn.getDB('test').foo.findOne();
         connections.push(conn);
     }
@@ -103,7 +103,7 @@ function testReadLoadBalancing(numReplicas) {
 
     for (var i = 0; i < secondaries.length * 10; i++) {
         conn = new Mongo(s._mongos[0].host);
-        conn.setSlaveOk();
+        conn.setSecondaryOk();
         conn.getDB('test').foo.findOne();
         connections.push(conn);
     }
diff --git a/jstests/replsets/auth1.js b/jstests/replsets/auth1.js
index cfc9c405dd1..a91137391a8 100644
--- a/jstests/replsets/auth1.js
+++ b/jstests/replsets/auth1.js
@@ -88,7 +88,7 @@ print("try some legal and illegal reads");
 var r = primary.getDB("test").foo.findOne();
 assert.eq(r.x, 1);
 
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 function doQueryOn(p) {
     var error = assert.throws(function() {
@@ -200,7 +200,7 @@ wait(function() {
 print("make sure it has the config, too");
 assert.soon(function() {
     for (var i in rs.nodes) {
-        rs.nodes[i].setSlaveOk();
+        rs.nodes[i].setSecondaryOk();
         rs.nodes[i].getDB("admin").auth("foo", "bar");
         config = rs.nodes[i].getDB("local").system.replset.findOne();
         // We expect the config version to be 3 due to the initial config and then the
diff --git a/jstests/replsets/auth_no_pri.js b/jstests/replsets/auth_no_pri.js
index 16a94763b04..bc606dc913a 100644
--- a/jstests/replsets/auth_no_pri.js
+++ b/jstests/replsets/auth_no_pri.js
@@ -23,7 +23,7 @@ rs.waitForState(nodes[2], ReplSetTest.State.SECONDARY);
 
 // Make sure you can still authenticate a replset connection with no primary
 var conn2 = new Mongo(rs.getURL());
-conn2.setSlaveOk(true);
+conn2.setSecondaryOk();
 assert(conn2.getDB('admin').auth({user: 'admin', pwd: 'pwd', mechanism: "SCRAM-SHA-1"}));
 assert.eq(1, conn2.getDB('admin').foo.findOne().a);
 
diff --git a/jstests/replsets/awaitable_ismaster_fcv_change.js b/jstests/replsets/awaitable_ismaster_fcv_change.js
index 41ed644e5e6..9e22fde507d 100644
--- a/jstests/replsets/awaitable_ismaster_fcv_change.js
+++ b/jstests/replsets/awaitable_ismaster_fcv_change.js
@@ -21,7 +21,7 @@ const secondaryAdminDB = secondary.getDB("admin");
 
 function runAwaitableIsMasterBeforeFCVChange(
     topologyVersionField, targetFCV, isPrimary, prevMinWireVersion, serverMaxWireVersion) {
-    db.getMongo().setSlaveOk();
+    db.getMongo().setSecondaryOk();
     let response = assert.commandWorked(db.runCommand({
         isMaster: 1,
         topologyVersion: topologyVersionField,
diff --git a/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js b/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
index f98e2fb4326..4d17389c209 100644
--- a/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
+++ b/jstests/replsets/awaitdata_getmore_new_last_committed_optime.js
@@ -56,7 +56,7 @@ let waitForGetMoreToFinish = startParallelShell(() => {
     load('jstests/replsets/rslib.js');
 
     const secondary = db.getMongo();
-    secondary.setSlaveOk();
+    secondary.setSecondaryOk();
 
     const dbName = 'test';
     const collName = 'coll';
diff --git a/jstests/replsets/buildindexes.js b/jstests/replsets/buildindexes.js
index e00a9e94ef7..3be0ba68896 100644
--- a/jstests/replsets/buildindexes.js
+++ b/jstests/replsets/buildindexes.js
@@ -21,7 +21,7 @@ var primary = replTest.getPrimary().getDB(name);
 var secondaryConns = replTest.getSecondaries();
 var secondaries = [];
 for (var i in secondaryConns) {
-    secondaryConns[i].setSlaveOk();
+    secondaryConns[i].setSecondaryOk();
     secondaries.push(secondaryConns[i].getDB(name));
 }
 replTest.awaitReplication();
diff --git a/jstests/replsets/catchup.js b/jstests/replsets/catchup.js
index a8284ad1772..7ab31e4d76c 100644
--- a/jstests/replsets/catchup.js
+++ b/jstests/replsets/catchup.js
@@ -37,7 +37,7 @@ rst.nodes.forEach(function(node) {
 });
 
 function checkOpInOplog(node, op, count) {
-    node.getDB("admin").getMongo().setSlaveOk();
+    node.getDB("admin").getMongo().setSecondaryOk();
     var oplog = node.getDB("local")['oplog.rs'];
     var oplogArray = oplog.find().toArray();
     assert.eq(oplog.count(op), count, "op: " + tojson(op) + ", oplog: " + tojson(oplogArray));
diff --git a/jstests/replsets/db_reads_while_recovering_all_commands.js b/jstests/replsets/db_reads_while_recovering_all_commands.js
index e742e1b43af..a5b3f9d3ab0 100644
--- a/jstests/replsets/db_reads_while_recovering_all_commands.js
+++ b/jstests/replsets/db_reads_while_recovering_all_commands.js
@@ -192,6 +192,7 @@ const allCommands = {
     hello: {skip: isNotAUserDataRead},
     hostInfo: {skip: isNotAUserDataRead},
     httpClientRequest: {skip: isNotAUserDataRead},
+    importCollection: {skip: isNotAUserDataRead},
     insert: {skip: isPrimaryOnly},
     internalRenameIfOptionsAndIndexesMatch: {skip: isAnInternalCommand},
     invalidateUserCache: {skip: isNotAUserDataRead},
diff --git a/jstests/replsets/disconnect_on_legacy_write_to_secondary.js b/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
index edf5950a62b..9678fc1a98e 100644
--- a/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
+++ b/jstests/replsets/disconnect_on_legacy_write_to_secondary.js
@@ -52,16 +52,16 @@ const primaryDb = primaryDataConn.getDB("test");
 const primaryColl = primaryDb[collname];
 primaryDataConn.forceWriteMode('legacy');
 
-function getNotMasterLegacyUnackWritesCounter() {
+function getNotPrimaryLegacyUnackWritesCounter() {
     return assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1}))
-        .metrics.repl.network.notMasterLegacyUnacknowledgedWrites;
+        .metrics.repl.network.notPrimaryLegacyUnacknowledgedWrites;
 }
 
 function runStepDownTest({description, failpoint, operation}) {
     jsTestLog("Enabling failpoint to block " + description + "s");
     let failPoint = configureFailPoint(primaryAdmin, failpoint);
 
-    let failedLegacyUnackWritesBefore = getNotMasterLegacyUnackWritesCounter();
+    let failedLegacyUnackWritesBefore = getNotPrimaryLegacyUnackWritesCounter();
 
     jsTestLog("Trying legacy " + description + " on stepping-down primary");
     operation();
@@ -77,7 +77,7 @@ function runStepDownTest({description, failpoint, operation}) {
 
     // Validate the number of legacy unacknowledged writes failed due to step down resulted
     // in network disconnection.
-    let failedLegacyUnackWritesAfter = getNotMasterLegacyUnackWritesCounter();
+    let failedLegacyUnackWritesAfter = getNotPrimaryLegacyUnackWritesCounter();
     assert.eq(failedLegacyUnackWritesAfter, failedLegacyUnackWritesBefore + 1);
 
     // Allow the primary to be re-elected, and wait for it.
diff --git a/jstests/replsets/explain_slaveok.js b/jstests/replsets/explain_slaveok.js
index 68eda89bce7..f3215af9ab6 100644
--- a/jstests/replsets/explain_slaveok.js
+++ b/jstests/replsets/explain_slaveok.js
@@ -1,12 +1,12 @@
 // Test the explain command on the primary and on secondaries:
 //
-// 1) Explain of read operations should work on the secondaries iff slaveOk is set.
+// 1) Explain of read operations should work on the secondaries iff secondaryOk is set.
 //
 // 2) Explain of write operations should
-//     --fail on secondaries, even if slaveOk is set,
+//     --fail on secondaries, even if secondaryOk is set,
 //     --succeed on primary without applying any writes.
 
-var name = "explain_slaveok";
+var name = "explain_secondaryok";
 
 print("Start replica set with two nodes");
 var replTest = new ReplSetTest({name: name, nodes: 2});
@@ -16,22 +16,22 @@ var primary = replTest.getPrimary();
 
 // Insert a document and let it sync to the secondary.
 print("Initial sync");
-primary.getDB("test").explain_slaveok.insert({a: 1});
+primary.getDB("test").explain_secondaryok.insert({a: 1});
 replTest.awaitReplication();
 
 // Check that the document is present on the primary.
-assert.eq(1, primary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+assert.eq(1, primary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
 
-// We shouldn't be able to read from the secondary with slaveOk off.
+// We shouldn't be able to read from the secondary with secondaryOk off.
 var secondary = replTest.getSecondary();
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 assert.throws(function() {
-    secondary.getDB("test").explain_slaveok.findOne({a: 1});
+    secondary.getDB("test").explain_secondaryok.findOne({a: 1});
 });
 
-// With slaveOk on, we should be able to read from the secondary.
-secondary.getDB("test").getMongo().setSlaveOk(true);
-assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+// With secondaryOk on, we should be able to read from the secondary.
+secondary.getDB("test").getMongo().setSecondaryOk();
+assert.eq(1, secondary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
 
 //
 // Test explains on primary.
@@ -39,12 +39,12 @@ assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
 
 // Explain a count on the primary.
 var explainOut = primary.getDB("test").runCommand(
-    {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
+    {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
 assert.commandWorked(explainOut, "explain read op on primary");
 
 // Explain an update on the primary.
 explainOut = primary.getDB("test").runCommand({
-    explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+    explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
     verbosity: "executionStats"
 });
 assert.commandWorked(explainOut, "explain write op on primary");
@@ -57,52 +57,52 @@ assert.eq(1, stages.nWouldModify);
 
 // Confirm that the document did not actually get modified on the primary
 // or on the secondary.
-assert.eq(1, primary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
-secondary.getDB("test").getMongo().setSlaveOk(true);
-assert.eq(1, secondary.getDB("test").explain_slaveok.findOne({a: 1})["a"]);
+assert.eq(1, primary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
+secondary.getDB("test").getMongo().setSecondaryOk();
+assert.eq(1, secondary.getDB("test").explain_secondaryok.findOne({a: 1})["a"]);
 
 //
 // Test explains on secondary.
 //
 
-// Explain a count on the secondary with slaveOk off. Should fail because
-// slaveOk is required for explains on a secondary.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain a count on the secondary with secondaryOk off. Should fail because
+// secondaryOk is required for explains on a secondary.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 explainOut = secondary.getDB("test").runCommand(
-    {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "explain read op on secondary, slaveOk false");
+    {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "explain read op on secondary, secondaryOk false");
 
-// Explain of count should succeed once slaveOk is true.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// Explain of count should succeed once secondaryOk is true.
+secondary.getDB("test").getMongo().setSecondaryOk();
 explainOut = secondary.getDB("test").runCommand(
-    {explain: {count: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandWorked(explainOut, "explain read op on secondary, slaveOk true");
+    {explain: {count: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandWorked(explainOut, "explain read op on secondary, secondaryOk true");
 
-// Explain .find() on a secondary, setting slaveOk directly on the query.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain .find() on a secondary, setting secondaryOk directly on the query.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 assert.throws(function() {
-    secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
+    secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
 });
 
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 explainOut = secondary.getDB("test")
-                 .explain_slaveok.explain("executionStats")
+                 .explain_secondaryok.explain("executionStats")
                  .find({a: 1})
                  .addOption(DBQuery.Option.slaveOk)
                  .finish();
-assert.commandWorked(explainOut, "explain read op on secondary, slaveOk set to true on query");
+assert.commandWorked(explainOut, "explain read op on secondary, slaveOk bit set to true on query");
 
-secondary.getDB("test").getMongo().setSlaveOk(true);
+secondary.getDB("test").getMongo().setSecondaryOk();
 explainOut =
-    secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
-assert.commandWorked(explainOut, "explain .find() on secondary, slaveOk set to true");
+    secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
+assert.commandWorked(explainOut, "explain .find() on secondary, secondaryOk set to true");
 
-// Explain .find() on a secondary, setting slaveOk to false with various read preferences.
+// Explain .find() on a secondary, setting secondaryOk to false with various read preferences.
 var readPrefModes = ["secondary", "secondaryPreferred", "primaryPreferred", "nearest"];
 readPrefModes.forEach(function(prefString) {
-    secondary.getDB("test").getMongo().setSlaveOk(false);
+    secondary.getDB("test").getMongo().setSecondaryOk(false);
     explainOut = secondary.getDB("test")
-                     .explain_slaveok.explain("executionStats")
+                     .explain_secondaryok.explain("executionStats")
                      .find({a: 1})
                      .readPref(prefString)
                      .finish();
@@ -112,7 +112,7 @@ readPrefModes.forEach(function(prefString) {
     // Similarly should succeed if a read preference is set on the connection.
     secondary.setReadPref(prefString);
     explainOut =
-        secondary.getDB("test").explain_slaveok.explain("executionStats").find({a: 1}).finish();
+        secondary.getDB("test").explain_secondaryok.explain("executionStats").find({a: 1}).finish();
     assert.commandWorked(
         explainOut,
         "explain .find() on secondary, '" + prefString + "' read preference on connection");
@@ -120,35 +120,36 @@ readPrefModes.forEach(function(prefString) {
     secondary.setReadPref();
 });
 
-// Fail explain find() on a secondary, setting slaveOk to false with read preference set to primary.
+// Fail explain find() on a secondary, setting secondaryOk to false with read preference set to
+// primary.
 var prefStringPrimary = "primary";
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 explainOut = secondary.getDB("test").runCommand(
-    {explain: {find: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "not master and slaveOk=false");
+    {explain: {find: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "not primary and secondaryOk=false");
 
 // Similarly should fail if a read preference is set on the connection.
 secondary.setReadPref(prefStringPrimary);
 explainOut = secondary.getDB("test").runCommand(
-    {explain: {find: "explain_slaveok", query: {a: 1}}, verbosity: "executionStats"});
-assert.commandFailed(explainOut, "not master and slaveOk=false");
+    {explain: {find: "explain_secondaryok", query: {a: 1}}, verbosity: "executionStats"});
+assert.commandFailed(explainOut, "not primary and secondaryOk=false");
 // Unset read pref on the connection.
 secondary.setReadPref();
 
-// Explain an update on the secondary with slaveOk off. Should fail because
-// slaveOk is required for explains on a secondary.
-secondary.getDB("test").getMongo().setSlaveOk(false);
+// Explain an update on the secondary with secondaryOk off. Should fail because
+// secondaryOk is required for explains on a secondary.
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 explainOut = secondary.getDB("test").runCommand({
-    explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+    explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
     verbosity: "executionStats"
 });
-assert.commandFailed(explainOut, "explain write op on secondary, slaveOk false");
+assert.commandFailed(explainOut, "explain write op on secondary, secondaryOk false");
 
-// Explain of the update should also fail with slaveOk on.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// Explain of the update should also fail with secondaryOk on.
+secondary.getDB("test").getMongo().setSecondaryOk();
 explainOut = secondary.getDB("test").runCommand({
-    explain: {update: "explain_slaveok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
+    explain: {update: "explain_secondaryok", updates: [{q: {a: 1}, u: {$set: {a: 5}}}]},
     verbosity: "executionStats"
 });
-assert.commandFailed(explainOut, "explain write op on secondary, slaveOk true");
+assert.commandFailed(explainOut, "explain write op on secondary, secondaryOk true");
 replTest.stopSet();
diff --git a/jstests/replsets/fsync_lock_read_secondaries.js b/jstests/replsets/fsync_lock_read_secondaries.js
index e73ceab58ba..daed9de7ad6 100644
--- a/jstests/replsets/fsync_lock_read_secondaries.js
+++ b/jstests/replsets/fsync_lock_read_secondaries.js
@@ -50,7 +50,7 @@ replTest.awaitReplication();
 
 // Calling getPrimary also populates '_secondaries'.
 var secondaries = replTest.getSecondaries();
-secondaries[0].setSlaveOk();
+secondaries[0].setSecondaryOk();
 
 assert.commandWorked(secondaries[0].getDB("admin").runCommand({fsync: 1, lock: 1}));
 var docNum = 1000;
diff --git a/jstests/replsets/groupAndMapReduce.js b/jstests/replsets/groupAndMapReduce.js
index 270436bf62c..2723f800a07 100644
--- a/jstests/replsets/groupAndMapReduce.js
+++ b/jstests/replsets/groupAndMapReduce.js
@@ -36,7 +36,7 @@ doTest = function(signal) {
     assert(secondaries.length == 2, "Expected 2 secondaries but length was " + secondaries.length);
     secondaries.forEach(function(secondary) {
         // try to read from secondary
-        secondary.slaveOk = true;
+        secondary.setSecondaryOk();
         var count = secondary.getDB("foo").foo.find().itcount();
         printjson(count);
         assert.eq(len, count, "secondary count wrong: " + secondary);
@@ -46,7 +46,7 @@ doTest = function(signal) {
         printjson(one);
 
         print("Calling inline mr() with slaveOk=true, must succeed");
-        secondary.slaveOk = true;
+        secondary.setSecondaryOk();
         map = function() {
             emit(this.a, 1);
         };
diff --git a/jstests/replsets/initial_sync4.js b/jstests/replsets/initial_sync4.js
index 80103839bfb..35dbd632715 100644
--- a/jstests/replsets/initial_sync4.js
+++ b/jstests/replsets/initial_sync4.js
@@ -45,7 +45,7 @@
 
     jsTestLog("5. Wait for new node to start cloning");
 
-    s.setSlaveOk();
+    s.setSecondaryOk();
     var sc = s.getDB("d")["c"];
 
     wait(function() {
diff --git a/jstests/replsets/initial_sync_ambiguous_index.js b/jstests/replsets/initial_sync_ambiguous_index.js
index 7e415fade43..c50324db935 100644
--- a/jstests/replsets/initial_sync_ambiguous_index.js
+++ b/jstests/replsets/initial_sync_ambiguous_index.js
@@ -44,7 +44,7 @@ const secondary = rst.add({
     rsConfig: {votes: 0, priority: 0},
     setParameter: {"numInitialSyncAttempts": 1, 'collectionClonerBatchSize': clonerBatchSize}
 });
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryColl = secondary.getDB(dbName).getCollection(collectionName);
 
 // We set the collectionClonerBatchSize low above, so we will definitely hit
diff --git a/jstests/replsets/initial_sync_applier_error.js b/jstests/replsets/initial_sync_applier_error.js
index 7ef7058aea4..e880c739ef1 100644
--- a/jstests/replsets/initial_sync_applier_error.js
+++ b/jstests/replsets/initial_sync_applier_error.js
@@ -31,7 +31,7 @@ assert.commandWorked(coll.insert({_id: 0, content: "hi"}));
 // but before copying databases.
 var secondary =
     replSet.add({setParameter: "numInitialSyncAttempts=2", rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 let failPoint = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
 replSet.reInitiate();
diff --git a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
index 023184c5dfb..292b0318ecd 100644
--- a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
+++ b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp.js
@@ -139,7 +139,7 @@ replTest.awaitReplication();
 jsTestLog("Initial sync completed");
 
 // Make sure the secondary fetched enough transaction oplog entries.
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryOplog = secondary.getDB("local").getCollection("oplog.rs");
 assert.eq(secondaryOplog.find({"ts": beginFetchingTs}).itcount(), 1);
 
diff --git a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
index a4420ff9940..9b1839b4c43 100644
--- a/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
+++ b/jstests/replsets/initial_sync_fetch_from_oldest_active_transaction_timestamp_no_oplog_application.js
@@ -98,7 +98,7 @@ replTest.waitForState(secondary, ReplSetTest.State.SECONDARY);
 jsTestLog("Initial sync completed");
 
 // Make sure the secondary fetched enough transaction oplog entries.
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryOplog = secondary.getDB("local").getCollection("oplog.rs");
 assert.eq(secondaryOplog.find({"ts": beginFetchingTs}).itcount(), 1);
 
diff --git a/jstests/replsets/initial_sync_invalid_views.js b/jstests/replsets/initial_sync_invalid_views.js
index fb5a1975323..9faf5207608 100644
--- a/jstests/replsets/initial_sync_invalid_views.js
+++ b/jstests/replsets/initial_sync_invalid_views.js
@@ -18,7 +18,7 @@ assert.commandWorked(coll.insert({a: 1}));
 
 // Add a secondary node but make it hang before copying databases.
 let secondary = replSet.add({rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 assert.commandWorked(secondary.getDB('admin').runCommand(
     {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'}));
diff --git a/jstests/replsets/initial_sync_move_forward.js b/jstests/replsets/initial_sync_move_forward.js
index d5142d06b98..c99b529a700 100644
--- a/jstests/replsets/initial_sync_move_forward.js
+++ b/jstests/replsets/initial_sync_move_forward.js
@@ -41,7 +41,7 @@ assert.commandWorked(masterColl.ensureIndex({x: 1}, {unique: true}));
 // Add a secondary.
 var secondary =
     rst.add({setParameter: "numInitialSyncAttempts=1", rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 var secondaryColl = secondary.getDB("test").coll;
 
 // Pause initial sync when the secondary has copied {_id: 0, x: 0} and {_id: 1, x: 1}.
diff --git a/jstests/replsets/initial_sync_oplog_rollover.js b/jstests/replsets/initial_sync_oplog_rollover.js
index 268ec261d39..b9c1eda8f2f 100644
--- a/jstests/replsets/initial_sync_oplog_rollover.js
+++ b/jstests/replsets/initial_sync_oplog_rollover.js
@@ -38,7 +38,7 @@ var firstOplogEntry = getFirstOplogEntry(primary);
 
 // Add a secondary node but make it hang before copying databases.
 var secondary = replSet.add();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 var failPoint = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
 replSet.reInitiate();
diff --git a/jstests/replsets/initial_sync_replSetGetStatus.js b/jstests/replsets/initial_sync_replSetGetStatus.js
index d315421577a..3d999cc9553 100644
--- a/jstests/replsets/initial_sync_replSetGetStatus.js
+++ b/jstests/replsets/initial_sync_replSetGetStatus.js
@@ -24,7 +24,7 @@ assert.commandWorked(coll.insert({a: 2}));
 
 // Add a secondary node but make it hang before copying databases.
 var secondary = replSet.add({rsConfig: {votes: 0, priority: 0}});
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 var failPointBeforeCopying = configureFailPoint(secondary, 'initialSyncHangBeforeCopyingDatabases');
 var failPointBeforeFinish = configureFailPoint(secondary, 'initialSyncHangBeforeFinish');
diff --git a/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js b/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
index 80012cab607..9ea82c52bb3 100644
--- a/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
+++ b/jstests/replsets/initial_sync_replicates_prepare_received_during_another_initial_sync.js
@@ -47,7 +47,7 @@ function restartSecondaryAndForceSyncSource(replSet, secondary, syncSource, dbNa
     // Wait for the secondary to complete initial sync.
     waitForState(secondary, ReplSetTest.State.SECONDARY);
     // Allow for secondary reads.
-    secondary.setSlaveOk();
+    secondary.setSecondaryOk();
     const secondaryDB = secondary.getDB(dbName);
 
     // Confirm that we have a prepared transaction in progress on the secondary.
diff --git a/jstests/replsets/initial_sync_test_fixture_test.js b/jstests/replsets/initial_sync_test_fixture_test.js
index 625620584a4..755df7a2109 100644
--- a/jstests/replsets/initial_sync_test_fixture_test.js
+++ b/jstests/replsets/initial_sync_test_fixture_test.js
@@ -108,7 +108,7 @@ let prepareTimestamp = PrepareHelpers.prepareTransaction(session);
 assert(!initialSyncTest.step());
 
 secondary = initialSyncTest.getSecondary();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 // Make sure that we cannot read from this node yet.
 assert.commandFailedWithCode(secondary.getDB("test").runCommand({count: "foo"}),
diff --git a/jstests/replsets/initial_sync_uuid_not_found.js b/jstests/replsets/initial_sync_uuid_not_found.js
index 90e0024b76c..2e2911ee6dd 100644
--- a/jstests/replsets/initial_sync_uuid_not_found.js
+++ b/jstests/replsets/initial_sync_uuid_not_found.js
@@ -39,7 +39,7 @@ function ResyncWithFailpoint(failpointName, failpointData) {
     assert.eq(primary, rst.getPrimary(), 'Primary changed after reconfig');
 
     jsTestLog('Wait for new node to start cloning');
-    secondary.setSlaveOk();
+    secondary.setSecondaryOk();
     const secondaryDB = secondary.getDB(primaryDB.getName());
     const secondaryColl = secondaryDB[primaryColl.getName()];
 
diff --git a/jstests/replsets/initial_sync_with_write_load.js b/jstests/replsets/initial_sync_with_write_load.js
index fc1164c6c43..0474c1f9c10 100644
--- a/jstests/replsets/initial_sync_with_write_load.js
+++ b/jstests/replsets/initial_sync_with_write_load.js
@@ -24,8 +24,8 @@ replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
 var master = replTest.getPrimary();
 var a_conn = conns[0];
 var b_conn = conns[1];
-a_conn.setSlaveOk();
-b_conn.setSlaveOk();
+a_conn.setSecondaryOk();
+b_conn.setSecondaryOk();
 var A = a_conn.getDB("test");
 var B = b_conn.getDB("test");
 var AID = replTest.getNodeId(a_conn);
diff --git a/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js b/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
index 1c71eb94b66..da8c8fd544d 100644
--- a/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
+++ b/jstests/replsets/kill_reads_with_prepare_conflicts_during_step_up.js
@@ -74,7 +74,7 @@ TestData.clusterTime = clusterTimeAfterPrepare;
 
 const waitForSecondaryReadBlockedOnPrepareConflictThread = startParallelShell(() => {
     // Allow for secondary reads.
-    db.getMongo().setSlaveOk();
+    db.getMongo().setSecondaryOk();
     const parallelTestDB = db.getSiblingDB(TestData.dbName);
     const parallelTestCollName = TestData.collName;
 
diff --git a/jstests/replsets/libs/initial_sync_update_missing_doc.js b/jstests/replsets/libs/initial_sync_update_missing_doc.js
index f45d9a4e107..68bb14ac668 100644
--- a/jstests/replsets/libs/initial_sync_update_missing_doc.js
+++ b/jstests/replsets/libs/initial_sync_update_missing_doc.js
@@ -17,7 +17,7 @@ load("jstests/libs/fail_point_util.js");
 // must be called after reInitiateSetWithSecondary.
 var reInitiateSetWithSecondary = function(replSet, secondaryConfig) {
     const secondary = replSet.add(secondaryConfig);
-    secondary.setSlaveOk();
+    secondary.setSecondaryOk();
 
     // Make the secondary hang after retrieving the last op on the sync source but before
     // copying databases.
diff --git a/jstests/replsets/libs/rollback_resumable_index_build.js b/jstests/replsets/libs/rollback_resumable_index_build.js
index 0ab2148e783..e5c63d91ada 100644
--- a/jstests/replsets/libs/rollback_resumable_index_build.js
+++ b/jstests/replsets/libs/rollback_resumable_index_build.js
@@ -7,8 +7,10 @@ const RollbackResumableIndexBuildTest = class {
      * rollback starts is specified by rollbackStartFailPointName. The phase that the index build
      * will resume from after rollback completes is specified by rollbackEndFailPointName. If
      * either of these points is in the drain writes phase, documents to insert into the side
-     * writes table must be specified by sideWrites. Documents specified by insertsToBeRolledBack
-     * are inserted after transitioning to rollback operations and will be rolled back.
+     * writes table must be specified by sideWrites. locksYieldedFailPointName specifies a point
+     * during the index build between rollbackEndFailPointName and rollbackStartFailPointName at
+     * which its locks are yielded. Documents specified by insertsToBeRolledBack are inserted after
+     * transitioning to rollback operations and will be rolled back.
      */
     static run(rollbackTest,
                dbName,
@@ -18,6 +20,7 @@ const RollbackResumableIndexBuildTest = class {
                rollbackStartFailPointData,
                rollbackEndFailPointName,
                rollbackEndFailPointData,
+               locksYieldedFailPointName,
                insertsToBeRolledBack,
                sideWrites = []) {
         const originalPrimary = rollbackTest.getPrimary();
@@ -29,6 +32,14 @@ const RollbackResumableIndexBuildTest = class {
 
         rollbackTest.awaitLastOpCommitted();
 
+        // Set internalQueryExecYieldIterations to 0 and maxIndexBuildDrainBatchSize to 1 so that
+        // the index build is guaranteed to yield its locks between the rollback end and start
+        // failpoints.
+        assert.commandWorked(
+            originalPrimary.adminCommand({setParameter: 1, internalQueryExecYieldIterations: 0}));
+        assert.commandWorked(
+            originalPrimary.adminCommand({setParameter: 1, maxIndexBuildDrainBatchSize: 1}));
+
         const coll = originalPrimary.getDB(dbName).getCollection(collName);
         const indexName = "rollback_resumable_index_build";
 
@@ -57,32 +68,23 @@ const RollbackResumableIndexBuildTest = class {
 
         assert.commandWorked(coll.insert(insertsToBeRolledBack));
 
-        // Disable the failpoint in a parallel shell so that the primary can step down when the
-        // rollback test is transitioning to sync source operations before rollback.
-        const awaitDisableFailPointAfterContinuingInBackground = startParallelShell(
-            funWithArgs(function(failPointName, buildUUID) {
-                // Wait for the index build to be continue in the background.
-                checkLog.containsJson(db.getMongo(), 4760400, {
-                    buildUUID: function(uuid) {
-                        return uuid["uuid"]["$uuid"] === buildUUID;
-                    }
-                });
-
-                // Disable the failpoint so that stepdown can proceed.
-                assert.commandWorked(
-                    db.adminCommand({configureFailPoint: failPointName, mode: "off"}));
-            }, rollbackEndFp.failPointName, buildUUID), originalPrimary.port);
+        // Move the index build forward to a point at which its locks are yielded. This allows the
+        // primary to step down during the call to transitionToSyncSourceOperationsBeforeRollback()
+        // below.
+        const locksYieldedFp = configureFailPoint(
+            originalPrimary, locksYieldedFailPointName, {namespace: coll.getFullName()});
+        rollbackEndFp.off();
+        locksYieldedFp.wait();
 
         rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
 
-        awaitDisableFailPointAfterContinuingInBackground();
-
         // The index creation will report as having failed due to InterruptedDueToReplStateChange,
         // but it is still building in the background.
         awaitCreateIndex();
 
         // Wait until the index build reaches the desired starting point so that we can start the
         // rollback.
+        locksYieldedFp.off();
         rollbackStartFp.wait();
 
         // We ignore the return value here because the node will go into rollback immediately upon
diff --git a/jstests/replsets/libs/secondary_reads_test.js b/jstests/replsets/libs/secondary_reads_test.js
index 1d712fce05a..4840708dba2 100644
--- a/jstests/replsets/libs/secondary_reads_test.js
+++ b/jstests/replsets/libs/secondary_reads_test.js
@@ -14,7 +14,7 @@ function SecondaryReadsTest(name = "secondary_reads_test") {
     let primaryDB = primary.getDB(dbName);
     let secondary = rst.getSecondary();
     let secondaryDB = secondary.getDB(dbName);
-    secondaryDB.getMongo().setSlaveOk();
+    secondaryDB.getMongo().setSecondaryOk();
 
     let readers = [];
     let signalColl = "signalColl";
@@ -37,7 +37,7 @@ function SecondaryReadsTest(name = "secondary_reads_test") {
 
     this.startSecondaryReaders = function(nReaders, readFn) {
         let read = function() {
-            db.getMongo().setSlaveOk();
+            db.getMongo().setSecondaryOk();
             db = db.getSiblingDB(TestData.dbName);
             while (true) {
                 readFn();
diff --git a/jstests/replsets/maintenance2.js b/jstests/replsets/maintenance2.js
index 2b904346945..c62d6bf17b6 100644
--- a/jstests/replsets/maintenance2.js
+++ b/jstests/replsets/maintenance2.js
@@ -40,7 +40,7 @@ secondaries.forEach(function(secondary) {
     assert.eq(stats.myState, 3, "Secondary should be in recovering state.");
 
     print("count should fail in recovering state...");
-    secondary.slaveOk = true;
+    secondary.setSecondaryOk();
     assert.commandFailed(secondary.getDB("foo").runCommand({count: "foo"}));
 
     // unset maintenance mode when done
diff --git a/jstests/replsets/no_disconnect_on_stepdown.js b/jstests/replsets/no_disconnect_on_stepdown.js
index 68877c6fc64..77a5526c50b 100644
--- a/jstests/replsets/no_disconnect_on_stepdown.js
+++ b/jstests/replsets/no_disconnect_on_stepdown.js
@@ -73,7 +73,7 @@ function runStepDownTest({description, failpoint, operation, errorCode}) {
         assert.commandWorked(primaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
     assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
     assert.eq(replMetrics.stateTransition.userOperationsKilled, 1);
-    assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0);
+    assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, 0);
 
     // Allow the primary to be re-elected, and wait for it.
     assert.commandWorked(primaryAdmin.adminCommand({replSetFreeze: 0}));
diff --git a/jstests/replsets/not_master_unacknowledged_write.js b/jstests/replsets/not_master_unacknowledged_write.js
index a1570de931a..1fc65ddb7ba 100644
--- a/jstests/replsets/not_master_unacknowledged_write.js
+++ b/jstests/replsets/not_master_unacknowledged_write.js
@@ -5,12 +5,12 @@
 (function() {
 "use strict";
 
-function getNotMasterUnackWritesCounter() {
+function getNotPrimaryUnackWritesCounter() {
     return assert.commandWorked(primaryDB.adminCommand({serverStatus: 1}))
-        .metrics.repl.network.notMasterUnacknowledgedWrites;
+        .metrics.repl.network.notPrimaryUnacknowledgedWrites;
 }
 
-const collName = "not_master_unacknowledged_write";
+const collName = "not_primary_unacknowledged_write";
 
 var rst = new ReplSetTest({nodes: [{}, {rsConfig: {priority: 0}}]});
 rst.startSet();
@@ -22,8 +22,8 @@ var secondaryDB = secondary.getDB("test");
 var primaryColl = primaryDB[collName];
 var secondaryColl = secondaryDB[collName];
 
-// Verify that reading from secondaries does not impact `notMasterUnacknowledgedWrites`.
-const preReadingCounter = getNotMasterUnackWritesCounter();
+// Verify that reading from secondaries does not impact `notPrimaryUnacknowledgedWrites`.
+const preReadingCounter = getNotPrimaryUnackWritesCounter();
 jsTestLog("Reading from secondary ...");
 [{name: "findOne", fn: () => secondaryColl.findOne()},
  {name: "distinct", fn: () => secondaryColl.distinct("item")},
@@ -32,7 +32,7 @@ jsTestLog("Reading from secondary ...");
     assert.doesNotThrow(fn);
     assert.eq(assert.commandWorked(secondary.getDB("admin").isMaster()).ismaster, false);
 });
-const postReadingCounter = getNotMasterUnackWritesCounter();
+const postReadingCounter = getNotPrimaryUnackWritesCounter();
 assert.eq(preReadingCounter, postReadingCounter);
 
 jsTestLog("Primary on port " + primary.port + " hangs up on unacknowledged writes");
@@ -71,7 +71,7 @@ var command =
 
 var awaitShell = startParallelShell(command, primary.port);
 
-let failedUnackWritesBefore = getNotMasterUnackWritesCounter();
+let failedUnackWritesBefore = getNotPrimaryUnackWritesCounter();
 
 jsTestLog("Beginning unacknowledged insert");
 primaryColl.insertOne({}, {writeConcern: {w: 0}});
@@ -87,7 +87,7 @@ assert.includes(result.toString(), "network error while attempting to run comman
 
 // Validate the number of unacknowledged writes failed due to step down resulted in network
 // disconnection.
-let failedUnackWritesAfter = getNotMasterUnackWritesCounter();
+let failedUnackWritesAfter = getNotPrimaryUnackWritesCounter();
 assert.eq(failedUnackWritesAfter, failedUnackWritesBefore + 1);
 
 rst.stopSet();
diff --git a/jstests/replsets/plan_cache_slaveok.js b/jstests/replsets/plan_cache_slaveok.js
index 4ef60d93795..c20decf9eb1 100644
--- a/jstests/replsets/plan_cache_slaveok.js
+++ b/jstests/replsets/plan_cache_slaveok.js
@@ -1,7 +1,7 @@
 // Verify that the plan cache and index filter commands can be run on secondaries, but only
-// if slave ok is explicitly set.
+// if secondaryOk is explicitly set.
 
-var name = "plan_cache_slaveok";
+var name = "plan_cache_secondaryok";
 
 function assertPlanCacheCommandsSucceed(db) {
     assert.commandWorked(db.runCommand({planCacheClear: name, query: {a: 1}}));
@@ -50,13 +50,13 @@ assert.eq(1, primary.getDB("test")[name].findOne({a: 1})["a"]);
 // Make sure the plan cache commands succeed on the primary.
 assertPlanCacheCommandsSucceed(primary.getDB("test"));
 
-// With slave ok false, the commands should fail on the secondary.
+// With secondaryOk false, the commands should fail on the secondary.
 var secondary = replTest.getSecondary();
-secondary.getDB("test").getMongo().setSlaveOk(false);
+secondary.getDB("test").getMongo().setSecondaryOk(false);
 assertPlanCacheCommandsFail(secondary.getDB("test"));
 
-// With slave ok true, the commands should succeed on the secondary.
-secondary.getDB("test").getMongo().setSlaveOk(true);
+// With secondaryOk true, the commands should succeed on the secondary.
+secondary.getDB("test").getMongo().setSecondaryOk();
 assertPlanCacheCommandsSucceed(secondary.getDB("test"));
 
 replTest.stopSet();
diff --git a/jstests/replsets/prepare_transaction_read_at_cluster_time.js b/jstests/replsets/prepare_transaction_read_at_cluster_time.js
index 24894823b1a..1e6ae30b5d6 100644
--- a/jstests/replsets/prepare_transaction_read_at_cluster_time.js
+++ b/jstests/replsets/prepare_transaction_read_at_cluster_time.js
@@ -16,7 +16,7 @@ const runDBHashFn = (host, dbName, clusterTime, useSnapshot) => {
     const conn = new Mongo(host);
     const db = conn.getDB(dbName);
 
-    conn.setSlaveOk();
+    conn.setSecondaryOk();
     let cmd;
     if (useSnapshot) {
         cmd = {dbHash: 1, readConcern: {level: "snapshot", atClusterTime: eval(clusterTime)}};
diff --git a/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js b/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js
new file mode 100644
index 00000000000..4948ac85801
--- /dev/null
+++ b/jstests/replsets/print_secondary_replication_info_unreachable_secondary.js
@@ -0,0 +1,25 @@
+// Tests the output of db.printSecondaryReplicationInfo() for unreachable secondaries.
+
+(function() {
+"use strict";
+const name = "printSecondaryReplicationInfo";
+const replSet = new ReplSetTest({name: name, nodes: 2});
+replSet.startSet();
+replSet.initiateWithHighElectionTimeout();
+
+const primary = replSet.getPrimary();
+primary.getDB('test').foo.insert({a: 1});
+replSet.awaitReplication();
+
+const secondary = replSet.getSecondary();
+replSet.stop(replSet.getNodeId(secondary));
+replSet.waitForState(secondary, ReplSetTest.State.DOWN);
+
+const joinShell =
+    startParallelShell("db.getSiblingDB('admin').printSecondaryReplicationInfo();", primary.port);
+joinShell();
+assert(
+    rawMongoProgramOutput().match("no replication info, yet.  State: \\(not reachable/healthy\\)"));
+
+replSet.stopSet();
+})();
diff --git a/jstests/replsets/quiesce_mode.js b/jstests/replsets/quiesce_mode.js
index ae47952cbf8..52c39f3edbd 100644
--- a/jstests/replsets/quiesce_mode.js
+++ b/jstests/replsets/quiesce_mode.js
@@ -48,7 +48,7 @@ function runAwaitableIsMaster(topologyVersionField) {
 }
 
 function runFind() {
-    db.getMongo().setSlaveOk();
+    db.getMongo().setSecondaryOk();
     assert.eq(4, db.getSiblingDB("test").coll.find().itcount());
 }
 
diff --git a/jstests/replsets/read_committed_after_rollback.js b/jstests/replsets/read_committed_after_rollback.js
index 41bd1d29268..a7e46e15e86 100644
--- a/jstests/replsets/read_committed_after_rollback.js
+++ b/jstests/replsets/read_committed_after_rollback.js
@@ -75,7 +75,7 @@ assert.eq(doDirtyRead(oldPrimaryColl), 'INVALID');
 assert.eq(doCommittedRead(oldPrimaryColl), 'old');
 
 // Change the partitioning so that oldPrimary is isolated, and newPrimary can be elected.
-oldPrimary.setSlaveOk();
+oldPrimary.setSecondaryOk();
 oldPrimary.disconnect(arbiters);
 newPrimary.reconnect(arbiters);
 assert.soon(() => newPrimary.adminCommand('isMaster').ismaster, '', 60 * 1000);
diff --git a/jstests/replsets/read_committed_no_snapshots.js b/jstests/replsets/read_committed_no_snapshots.js
index a0fe52cd565..280b0de7d49 100644
--- a/jstests/replsets/read_committed_no_snapshots.js
+++ b/jstests/replsets/read_committed_no_snapshots.js
@@ -38,9 +38,9 @@ replTest.initiateWithAnyNodeAsPrimary(
 var primary = replTest.getPrimary();
 var secondaries = replTest.getSecondaries();
 var healthySecondary = secondaries[0];
-healthySecondary.setSlaveOk();
+healthySecondary.setSecondaryOk();
 var noSnapshotSecondary = secondaries[1];
-noSnapshotSecondary.setSlaveOk();
+noSnapshotSecondary.setSecondaryOk();
 
 // Do a write, wait for it to replicate, and ensure it is visible.
 var res = primary.getDB(name).runCommandWithMetadata(  //
diff --git a/jstests/replsets/read_operations_during_rollback.js b/jstests/replsets/read_operations_during_rollback.js
index d743c7b8303..f91ba15d31b 100644
--- a/jstests/replsets/read_operations_during_rollback.js
+++ b/jstests/replsets/read_operations_during_rollback.js
@@ -24,7 +24,7 @@ setFailPoint(rollbackNode, "rollbackHangAfterTransitionToRollback");
 setFailPoint(rollbackNode, "GetMoreHangBeforeReadLock");
 
 const joinGetMoreThread = startParallelShell(() => {
-    db.getMongo().setSlaveOk();
+    db.getMongo().setSecondaryOk();
     const cursorID = assert.commandWorked(db.runCommand({"find": "coll", batchSize: 0})).cursor.id;
     // Make sure an outstanding read operation gets killed during rollback even though the read
     // was started before rollback. Outstanding read operations are killed during rollback and
diff --git a/jstests/replsets/read_operations_during_step_down.js b/jstests/replsets/read_operations_during_step_down.js
index 96fa2651237..4909c7a9177 100644
--- a/jstests/replsets/read_operations_during_step_down.js
+++ b/jstests/replsets/read_operations_during_step_down.js
@@ -113,7 +113,7 @@ assert.eq(replMetrics.stateTransition.lastStateTransition, "stepDown");
 assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
 // Should account for find and getmore commands issued before step down.
 assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
-assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, 0);
+assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, 0);
 
 rst.stopSet();
 })();
diff --git a/jstests/replsets/read_operations_during_step_up.js b/jstests/replsets/read_operations_during_step_up.js
index 91d202659a4..86ea4a3c0d6 100644
--- a/jstests/replsets/read_operations_during_step_up.js
+++ b/jstests/replsets/read_operations_during_step_up.js
@@ -36,10 +36,10 @@ assert.commandWorked(
         primaryColl.insert({_id: 0}, {"writeConcern": {"w": "majority"}}));
 rst.awaitReplication();
 
-// It's possible for notMasterUnacknowledgedWrites to be non-zero because of mirrored reads during
+// It's possible for notPrimaryUnacknowledgedWrites to be non-zero because of mirrored reads during
 // initial sync.
 let replMetrics = assert.commandWorked(secondaryAdmin.adminCommand({serverStatus: 1})).metrics.repl;
-const startingNumNotMasterErrors = replMetrics.network.notMasterUnacknowledgedWrites;
+const startingNumNotMasterErrors = replMetrics.network.notPrimaryUnacknowledgedWrites;
 
 // Open a cursor on secondary.
 const cursorIdToBeReadAfterStepUp =
@@ -49,7 +49,7 @@ jsTestLog("2. Start blocking getMore cmd before step up");
 const joinGetMoreThread = startParallelShell(() => {
     // Open another cursor on secondary before step up.
     secondaryDB = db.getSiblingDB(TestData.dbName);
-    secondaryDB.getMongo().setSlaveOk(true);
+    secondaryDB.getMongo().setSecondaryOk();
 
     const cursorIdToBeReadDuringStepUp =
         assert.commandWorked(secondaryDB.runCommand({"find": TestData.collName, batchSize: 0}))
@@ -71,7 +71,7 @@ waitForCurOpByFailPoint(
 jsTestLog("2. Start blocking find cmd before step up");
 const joinFindThread = startParallelShell(() => {
     secondaryDB = db.getSiblingDB(TestData.dbName);
-    secondaryDB.getMongo().setSlaveOk(true);
+    secondaryDB.getMongo().setSecondaryOk();
 
     // Enable the fail point for find cmd.
     assert.commandWorked(
@@ -127,7 +127,7 @@ assert.eq(replMetrics.stateTransition.lastStateTransition, "stepUp");
 assert.eq(replMetrics.stateTransition.userOperationsKilled, 0);
 // Should account for find and getmore commands issued before step up.
 assert.gte(replMetrics.stateTransition.userOperationsRunning, 2);
-assert.eq(replMetrics.network.notMasterUnacknowledgedWrites, startingNumNotMasterErrors);
+assert.eq(replMetrics.network.notPrimaryUnacknowledgedWrites, startingNumNotMasterErrors);
 
 rst.stopSet();
 })();
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
index 6257f066eab..b72dd806295 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync.js
@@ -129,7 +129,7 @@ replTest.awaitSecondaryNodes();
 
 jsTestLog("Initial sync completed");
 
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryColl = secondary.getDB(dbName).getCollection(collName);
 
 // Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
index 3574010f636..38f7f431ca2 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_index_build.js
@@ -106,7 +106,7 @@ replTest.awaitSecondaryNodes();
 
 jsTestLog("Initial sync completed");
 
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryColl = secondary.getDB(dbName).getCollection(collName);
 
 // Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
index dbc2c05dfff..d5b0eb39898 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_no_oplog_application.js
@@ -80,7 +80,7 @@ replTest.awaitSecondaryNodes();
 
 jsTestLog("Initial sync completed");
 
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryColl = secondary.getDB(dbName).getCollection(collName);
 
 // Make sure that while reading from the node that went through initial sync, we can't read
diff --git a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
index 85e4b4e9874..f5100c39e35 100644
--- a/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
+++ b/jstests/replsets/reconstruct_prepared_transactions_initial_sync_on_oplog_seed.js
@@ -107,7 +107,7 @@ PrepareHelpers.awaitMajorityCommitted(replTest, prepareTimestamp);
 
 jsTestLog("Initial sync completed");
 
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 const secondaryColl = secondary.getDB(dbName).getCollection(collName);
 
 jsTestLog("Checking that the transaction is properly prepared");
diff --git a/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js b/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
index 31845da9629..56f40a8793f 100644
--- a/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
+++ b/jstests/replsets/recover_prepared_transactions_startup_secondary_application.js
@@ -82,7 +82,7 @@ PrepareHelpers.awaitMajorityCommitted(replTest, prepareTimestamp2);
 
 // Wait for the node to complete recovery before trying to read from it.
 replTest.awaitSecondaryNodes();
-secondary.setSlaveOk();
+secondary.setSecondaryOk();
 
 jsTestLog("Checking that the first transaction is properly prepared");
 
diff --git a/jstests/replsets/rename_collection_temp.js b/jstests/replsets/rename_collection_temp.js
index dc6ffd6f84a..1bf716784d6 100644
--- a/jstests/replsets/rename_collection_temp.js
+++ b/jstests/replsets/rename_collection_temp.js
@@ -54,7 +54,7 @@ replTest.awaitReplication();
 var secondary = replTest.getSecondary();
 var secondaryFoo = secondary.getDB("foo");
 
-secondaryFoo.permanentColl.setSlaveOk(true);
+secondaryFoo.permanentColl.setSecondaryOk();
 
 // Get the information on the secondary to ensure it was replicated correctly.
 checkCollectionTemp(secondaryFoo, "permanentColl", false);
diff --git a/jstests/replsets/replset1.js b/jstests/replsets/replset1.js
index 8225422338d..35ceb70121f 100644
--- a/jstests/replsets/replset1.js
+++ b/jstests/replsets/replset1.js
@@ -104,7 +104,7 @@ var doTest = function(signal) {
     var secondaries = replTest.getSecondaries();
     assert(secondaries.length == 2, "Expected 2 secondaries but length was " + secondaries.length);
     secondaries.forEach(function(secondary) {
-        secondary.setSlaveOk();
+        secondary.setSecondaryOk();
         var count = secondary.getDB("bar").runCommand({count: "bar"});
         printjson(count);
         assert.eq(1000, count.n, "secondary count wrong: " + secondary);
@@ -118,7 +118,7 @@ var doTest = function(signal) {
     var t = db.foo;
 
     var ts = secondaries.map(function(z) {
-        z.setSlaveOk();
+        z.setSecondaryOk();
         return z.getDB("foo").foo;
     });
 
diff --git a/jstests/replsets/replset2.js b/jstests/replsets/replset2.js
index 3c9b9613eed..38134794dfb 100644
--- a/jstests/replsets/replset2.js
+++ b/jstests/replsets/replset2.js
@@ -29,7 +29,7 @@ doTest = function(signal) {
 
     var secondaries = replTest.getSecondaries();
     secondaries.forEach(function(secondary) {
-        secondary.setSlaveOk();
+        secondary.setSecondaryOk();
     });
 
     // Test write concern with multiple inserts.
diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js
index 5488d8a9cd0..9ea1424a426 100644
--- a/jstests/replsets/replset5.js
+++ b/jstests/replsets/replset5.js
@@ -55,8 +55,8 @@ if (wcError != null) {
 }
 
 var secondaries = replTest.getSecondaries();
-secondaries[0].setSlaveOk();
-secondaries[1].setSlaveOk();
+secondaries[0].setSecondaryOk();
+secondaries[1].setSecondaryOk();
 
 var secondary0Count = secondaries[0].getDB(testDB).foo.find().itcount();
 assert(secondary0Count == docNum,
diff --git a/jstests/replsets/replset6.js b/jstests/replsets/replset6.js
index 40998d7f4f2..fd33175d823 100644
--- a/jstests/replsets/replset6.js
+++ b/jstests/replsets/replset6.js
@@ -10,7 +10,7 @@ var p = rt.getPrimary();
 rt.awaitSecondaryNodes();
 var secondaries = rt.getSecondaries();
 s = secondaries[0];
-s.setSlaveOk();
+s.setSecondaryOk();
 admin = p.getDB("admin");
 
 debug = function(foo) {};  // print( foo ); }
diff --git a/jstests/replsets/resync_majority_member.js b/jstests/replsets/resync_majority_member.js
index 4ab48f043ea..df0d233b4e3 100644
--- a/jstests/replsets/resync_majority_member.js
+++ b/jstests/replsets/resync_majority_member.js
@@ -99,8 +99,8 @@ assert.soon(() => {
 });
 
 // Observe that the old write does not exist anywhere in the set.
-syncSource.setSlaveOk();
-resyncNode.setSlaveOk();
+syncSource.setSecondaryOk();
+resyncNode.setSecondaryOk();
 assert.eq(0, syncSource.getDB(dbName)[collName].find(disappearingDoc).itcount());
 assert.eq(0, resyncNode.getDB(dbName)[collName].find(disappearingDoc).itcount());
 
diff --git a/jstests/replsets/rollback_auth.js b/jstests/replsets/rollback_auth.js
index 0aa7995bdc2..372f84ce645 100644
--- a/jstests/replsets/rollback_auth.js
+++ b/jstests/replsets/rollback_auth.js
@@ -47,8 +47,8 @@ replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
 var master = replTest.getPrimary();
 var a_conn = conns[0];
 var b_conn = conns[1];
-a_conn.setSlaveOk();
-b_conn.setSlaveOk();
+a_conn.setSecondaryOk();
+b_conn.setSecondaryOk();
 var A = a_conn.getDB("admin");
 var B = b_conn.getDB("admin");
 var a = a_conn.getDB("test");
diff --git a/jstests/replsets/rollback_creates_rollback_directory.js b/jstests/replsets/rollback_creates_rollback_directory.js
index 3cb47eb65a2..db795769bdf 100644
--- a/jstests/replsets/rollback_creates_rollback_directory.js
+++ b/jstests/replsets/rollback_creates_rollback_directory.js
@@ -31,8 +31,8 @@ function runRollbackDirectoryTest(shouldCreateRollbackFiles) {
     var master = replTest.getPrimary();
     var a_conn = conns[0];
     var b_conn = conns[1];
-    a_conn.setSlaveOk();
-    b_conn.setSlaveOk();
+    a_conn.setSecondaryOk();
+    b_conn.setSecondaryOk();
     var A = a_conn.getDB("test");
     var B = b_conn.getDB("test");
     var Apath = replTest.getDbPath(a_conn) + '/';
diff --git a/jstests/replsets/rollback_crud_op_sequences.js b/jstests/replsets/rollback_crud_op_sequences.js
index a2e89332141..cd42c303a96 100644
--- a/jstests/replsets/rollback_crud_op_sequences.js
+++ b/jstests/replsets/rollback_crud_op_sequences.js
@@ -45,10 +45,10 @@ replTest.initiate({
 replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
 var master = replTest.getPrimary();
 var a_conn = conns[0];
-a_conn.setSlaveOk();
+a_conn.setSecondaryOk();
 var A = a_conn.getDB("admin");
 var b_conn = conns[1];
-b_conn.setSlaveOk();
+b_conn.setSecondaryOk();
 var B = b_conn.getDB("admin");
 assert.eq(master, conns[0], "conns[0] assumed to be master");
 assert.eq(a_conn, master);
diff --git a/jstests/replsets/rollback_ddl_op_sequences.js b/jstests/replsets/rollback_ddl_op_sequences.js
index aff9cedaa39..62b2fb9cae2 100644
--- a/jstests/replsets/rollback_ddl_op_sequences.js
+++ b/jstests/replsets/rollback_ddl_op_sequences.js
@@ -54,10 +54,10 @@ replTest.initiate({
 replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY);
 var master = replTest.getPrimary();
 var a_conn = conns[0];
-a_conn.setSlaveOk();
+a_conn.setSecondaryOk();
 var A = a_conn.getDB("admin");
 var b_conn = conns[1];
-b_conn.setSlaveOk();
+b_conn.setSecondaryOk();
 var B = b_conn.getDB("admin");
 assert.eq(master, conns[0], "conns[0] assumed to be master");
 assert.eq(a_conn, master);
diff --git a/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js b/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
index 81631163f06..5b75f1b7b2c 100644
--- a/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_bulk_load_phase.js
@@ -13,12 +13,6 @@
 
 load('jstests/replsets/libs/rollback_resumable_index_build.js');
 
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
-    jsTestLog('Skipping test.');
-    return;
-}
-
 const dbName = "test";
 const rollbackStartFailPointName = "hangIndexBuildDuringBulkLoadPhase";
 const insertsToBeRolledBack = [{a: 4}, {a: 5}];
@@ -35,8 +29,9 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     {a: 1},
                                     rollbackStartFailPointName,
                                     {iteration: 1},
-                                    "hangAfterSettingUpIndexBuildUnlocked",
+                                    "hangAfterSettingUpIndexBuild",
                                     {},
+                                    "setYieldAllLocksHang",
                                     insertsToBeRolledBack);
 
 // Rollback to the collection scan phase.
@@ -47,7 +42,8 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     rollbackStartFailPointName,
                                     {iteration: 1},
                                     "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion",
-                                    {fieldsToMatch: {a: 2}},
+                                    {iteration: 1},
+                                    "setYieldAllLocksHang",
                                     insertsToBeRolledBack);
 
 rollbackTest.stop();
diff --git a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
index 23807d85383..33abffdc2e3 100644
--- a/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_collection_scan_phase.js
@@ -13,12 +13,6 @@
 
 load('jstests/replsets/libs/rollback_resumable_index_build.js');
 
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
-    jsTestLog('Skipping test.');
-    return;
-}
-
 const dbName = "test";
 const rollbackStartFailPointName = "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion";
 const insertsToBeRolledBack = [{a: 6}, {a: 7}];
@@ -34,9 +28,10 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {fieldsToMatch: {a: 2}},
-                                    "hangAfterSettingUpIndexBuildUnlocked",
+                                    {iteration: 3},
+                                    "hangAfterSettingUpIndexBuild",
                                     {},
+                                    "setYieldAllLocksHang",
                                     insertsToBeRolledBack);
 
 // Rollback to earlier in the collection scan phase.
@@ -45,9 +40,10 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {iteration: 4},
+                                    {iteration: 3},
                                     "hangIndexBuildDuringCollectionScanPhaseAfterInsertion",
-                                    {iteration: 2},
+                                    {iteration: 1},
+                                    "setYieldAllLocksHang",
                                     insertsToBeRolledBack);
 
 rollbackTest.stop();
diff --git a/jstests/replsets/rollback_resumable_index_build_complete.js b/jstests/replsets/rollback_resumable_index_build_complete.js
index 51f5988f4c9..cb74fc96345 100644
--- a/jstests/replsets/rollback_resumable_index_build_complete.js
+++ b/jstests/replsets/rollback_resumable_index_build_complete.js
@@ -13,12 +13,6 @@
 
 load('jstests/replsets/libs/rollback_resumable_index_build.js');
 
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
-    jsTestLog('Skipping test.');
-    return;
-}
-
 const dbName = "test";
 const insertsToBeRolledBack = [{a: 7}, {a: 8}];
 
@@ -32,7 +26,7 @@ RollbackResumableIndexBuildTest.runIndexBuildComplete(rollbackTest,
                                                       dbName,
                                                       coll.getName(),
                                                       {a: 1},
-                                                      "hangAfterSettingUpIndexBuildUnlocked",
+                                                      "hangAfterSettingUpIndexBuild",
                                                       {},
                                                       insertsToBeRolledBack);
 
diff --git a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
index 4e025596884..922ee451e01 100644
--- a/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
+++ b/jstests/replsets/rollback_resumable_index_build_drain_writes_phase.js
@@ -13,15 +13,9 @@
 
 load('jstests/replsets/libs/rollback_resumable_index_build.js');
 
-// TODO(SERVER-50775): Re-enable when stepdown issues are fixed in resumable index rollback tests.
-if (true) {
-    jsTestLog('Skipping test.');
-    return;
-}
-
 const dbName = "test";
 const rollbackStartFailPointName = "hangIndexBuildDuringDrainWritesPhase";
-const insertsToBeRolledBack = [{a: 13}, {a: 14}];
+const insertsToBeRolledBack = [{a: 18}, {a: 19}];
 
 const rollbackTest = new RollbackTest(jsTestName());
 const coll = rollbackTest.getPrimary().getDB(dbName).getCollection(jsTestName());
@@ -34,11 +28,12 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {iteration: 0},
-                                    "hangAfterSettingUpIndexBuildUnlocked",
+                                    {iteration: 1},
+                                    "hangAfterSettingUpIndexBuild",
                                     {},
+                                    "hangDuringIndexBuildDrainYield",
                                     insertsToBeRolledBack,
-                                    [{a: 4}, {a: 5}]);
+                                    [{a: 4}, {a: 5}, {a: 6}]);
 
 // Rollback to the collection scan phase.
 RollbackResumableIndexBuildTest.run(rollbackTest,
@@ -46,11 +41,12 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {iteration: 0},
+                                    {iteration: 1},
                                     "hangIndexBuildDuringCollectionScanPhaseBeforeInsertion",
-                                    {fieldsToMatch: {a: 2}},
+                                    {iteration: 1},
+                                    "hangDuringIndexBuildDrainYield",
                                     insertsToBeRolledBack,
-                                    [{a: 6}, {a: 7}]);
+                                    [{a: 7}, {a: 8}, {a: 9}]);
 
 // Rollback to the bulk load phase.
 RollbackResumableIndexBuildTest.run(rollbackTest,
@@ -58,26 +54,25 @@ RollbackResumableIndexBuildTest.run(rollbackTest,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {iteration: 0},
+                                    {iteration: 1},
                                     "hangIndexBuildDuringBulkLoadPhase",
                                     {iteration: 1},
+                                    "hangDuringIndexBuildDrainYield",
                                     insertsToBeRolledBack,
-                                    [{a: 8}, {a: 9}]);
+                                    [{a: 10}, {a: 11}, {a: 12}]);
 
-// Rollback to earlier in the drain writes phase. We set maxIndexBuildDrainBatchSize to 1 so that
-// the primary can step down between iterations.
-assert.commandWorked(
-    rollbackTest.getPrimary().adminCommand({setParameter: 1, maxIndexBuildDrainBatchSize: 1}));
+// Rollback to earlier in the drain writes phase.
 RollbackResumableIndexBuildTest.run(rollbackTest,
                                     dbName,
                                     coll.getName(),
                                     {a: 1},
                                     rollbackStartFailPointName,
-                                    {iteration: 2},
+                                    {iteration: 3},
                                     "hangIndexBuildDuringDrainWritesPhaseSecond",
-                                    {iteration: 0},
+                                    {iteration: 1},
+                                    "hangDuringIndexBuildDrainYield",
                                     insertsToBeRolledBack,
-                                    [{a: 10}, {a: 11}, {a: 12}]);
+                                    [{a: 13}, {a: 14}, {a: 15}, {a: 16}, {a: 17}]);
 
 rollbackTest.stop();
 })();
diff --git a/jstests/replsets/rslib.js b/jstests/replsets/rslib.js
index 65567450c96..b53a5030d42 100644
--- a/jstests/replsets/rslib.js
+++ b/jstests/replsets/rslib.js
@@ -147,7 +147,7 @@ reconnect = function(conn) {
 };
 
 getLatestOp = function(server) {
-    server.getDB("admin").getMongo().setSlaveOk();
+    server.getDB("admin").getMongo().setSecondaryOk();
     var log = server.getDB("local")['oplog.rs'];
     var cursor = log.find({}).sort({'$natural': -1}).limit(1);
     if (cursor.hasNext()) {
@@ -157,7 +157,7 @@ getLatestOp = function(server) {
 };
 
 getLeastRecentOp = function({server, readConcern}) {
-    server.getDB("admin").getMongo().setSlaveOk();
+    server.getDB("admin").getMongo().setSecondaryOk();
     const oplog = server.getDB("local").oplog.rs;
     const cursor = oplog.find().sort({$natural: 1}).limit(1).readConcern(readConcern);
     if (cursor.hasNext()) {
diff --git a/jstests/replsets/server8070.js b/jstests/replsets/server8070.js
index 876a768fd7a..5bc4fd8f60d 100644
--- a/jstests/replsets/server8070.js
+++ b/jstests/replsets/server8070.js
@@ -36,8 +36,8 @@ replSet.initiate({
 // set up common points of access
 var master = replSet.getPrimary();
 var primary = master.getDB("foo");
-replSet.nodes[1].setSlaveOk();
-replSet.nodes[2].setSlaveOk();
+replSet.nodes[1].setSecondaryOk();
+replSet.nodes[2].setSecondaryOk();
 var member2 = replSet.nodes[1].getDB("admin");
 var member3 = replSet.nodes[2].getDB("admin");
 
diff --git a/jstests/replsets/slavedelay3.js b/jstests/replsets/slavedelay3.js
index 9d09fa4486c..1d12d22912b 100644
--- a/jstests/replsets/slavedelay3.js
+++ b/jstests/replsets/slavedelay3.js
@@ -17,7 +17,7 @@ var secondaryConns = replTest.getSecondaries();
 var secondaries = [];
 for (var i in secondaryConns) {
     var d = secondaryConns[i].getDB(name);
-    d.getMongo().setSlaveOk();
+    d.getMongo().setSecondaryOk();
     secondaries.push(d);
 }
 
diff --git a/jstests/replsets/slaveok_read_pref.js b/jstests/replsets/slaveok_read_pref.js
index 9fc11600e8d..e35f36e9c97 100644
--- a/jstests/replsets/slaveok_read_pref.js
+++ b/jstests/replsets/slaveok_read_pref.js
@@ -1,5 +1,5 @@
-// Test that slaveOk is implicitly allowed for queries on a secondary with a read preference other
-// than 'primary', and that queries which do have 'primary' read preference fail.
+// Test that secondaryOk is implicitly allowed for queries on a secondary with a read preference
+// other than 'primary', and that queries which do have 'primary' read preference fail.
 (function() {
 "use strict";
 
@@ -28,18 +28,18 @@ const secDB = rst.getSecondary().getDB(jsTestName());
 
 for (let readMode of ["commands", "legacy"]) {
     for (let readPref of readPrefs) {
-        for (let slaveOk of [true, false]) {
-            const testType = {readMode: readMode, readPref: readPref, slaveOk: slaveOk};
+        for (let secondaryOk of [true, false]) {
+            const testType = {readMode: readMode, readPref: readPref, secondaryOk: secondaryOk};
 
             secDB.getMongo().forceReadMode(readMode);
-            secDB.getMongo().setSlaveOk(slaveOk);
+            secDB.getMongo().setSecondaryOk(secondaryOk);
 
             const cursor = (readPref ? secDB.test.find().readPref(readPref) : secDB.test.find());
 
-            if (readPref === "primary" || (!readPref && !slaveOk)) {
+            if (readPref === "primary" || (!readPref && !secondaryOk)) {
                 // Attempting to run the query throws an error of type NotPrimaryNoSecondaryOk.
-                const slaveOkErr = assert.throws(() => cursor.itcount(), [], tojson(testType));
-                assert.commandFailedWithCode(slaveOkErr, ErrorCodes.NotPrimaryNoSecondaryOk);
+                const secondaryOkErr = assert.throws(() => cursor.itcount(), [], tojson(testType));
+                assert.commandFailedWithCode(secondaryOkErr, ErrorCodes.NotPrimaryNoSecondaryOk);
             } else {
                 // Succeeds for all non-primary readPrefs, and for no readPref iff slaveOk.
                 const docCount = assert.doesNotThrow(() => cursor.itcount(), [], tojson(testType));
@@ -51,7 +51,7 @@ for (let readMode of ["commands", "legacy"]) {
 
 function assertNotPrimaryNoSecondaryOk(func) {
     secDB.getMongo().forceReadMode("commands");
-    secDB.getMongo().setSlaveOk(false);
+    secDB.getMongo().setSecondaryOk(false);
     secDB.getMongo().setReadPref("primary");
     const res = assert.throws(func);
     assert.commandFailedWithCode(res, ErrorCodes.NotPrimaryNoSecondaryOk);
@@ -59,7 +59,7 @@ function assertNotPrimaryNoSecondaryOk(func) {
 
 // Test that agg with $out/$merge and non-inline mapReduce fail with 'NotPrimaryNoSecondaryOk' when
 // directed at a secondary with "primary" read preference.
-const secondaryColl = secDB.slaveok_read_pref;
+const secondaryColl = secDB.secondaryok_read_pref;
 assertNotPrimaryNoSecondaryOk(() => secondaryColl.aggregate([{$out: "target"}]).itcount());
 assertNotPrimaryNoSecondaryOk(
     () =>
diff --git a/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js b/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
index 690151796b3..ec537f873fd 100644
--- a/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
+++ b/jstests/replsets/startup_without_fcv_document_succeeds_if_initial_sync_flag_set.js
@@ -35,7 +35,7 @@ rst.awaitSecondaryNodes();
 
 // Get the new secondary connection.
 secondary = rst.getSecondary();
-secondary.setSlaveOk(true);
+secondary.setSecondaryOk();
 
 const secondaryAdminDb = secondary.getDB("admin");
 // Assert that the FCV document was cloned through initial sync on the secondary.
diff --git a/jstests/replsets/step_down_on_secondary.js b/jstests/replsets/step_down_on_secondary.js
index 64fcf73c3a2..5e8933d9017 100644
--- a/jstests/replsets/step_down_on_secondary.js
+++ b/jstests/replsets/step_down_on_secondary.js
@@ -93,7 +93,7 @@ jsTestLog("Do a read that hits a prepare conflict on the old primary");
 const wTPrintPrepareConflictLogFailPoint = configureFailPoint(primary, "WTPrintPrepareConflictLog");
 
 const joinReadThread = startParallelShell(() => {
-    db.getMongo().setSlaveOk(true);
+    db.getMongo().setSecondaryOk();
     oldPrimaryDB = db.getSiblingDB(TestData.dbName);
 
     assert.commandFailedWithCode(oldPrimaryDB.runCommand({
diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js
index 7f21efef1c6..f626ce5d5b9 100644
--- a/jstests/replsets/tenant_migration_donor_state_machine.js
+++ b/jstests/replsets/tenant_migration_donor_state_machine.js
@@ -154,9 +154,6 @@ configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
     jsTest.log("Test the case where the migration aborts");
     const migrationId = UUID();
 
-    let configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS);
-    configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
-
     let abortFp = configureFailPoint(donorPrimary, "abortTenantMigrationAfterBlockingStarts");
     assert.commandFailedWithCode(donorPrimary.adminCommand({
         donorStartMigration: 1,
@@ -187,6 +184,29 @@ configDonorsColl.createIndex({expireAt: 1}, {expireAfterSeconds: 0});
     testDonorForgetMigration(donorRst, recipientRst, migrationId, kDBPrefix);
 })();
 
+// Drop the TTL index to make sure that the migration state is still available when the
+// donorForgetMigration command is retried.
+configDonorsColl.dropIndex({expireAt: 1});
+
+(() => {
+    jsTest.log("Test that donorForgetMigration can be run multiple times");
+    const migrationId = UUID();
+
+    assert.commandWorked(donorPrimary.adminCommand({
+        donorStartMigration: 1,
+        migrationId: migrationId,
+        recipientConnectionString: kRecipientConnString,
+        databasePrefix: kDBPrefix,
+        readPreference: {mode: "primary"}
+    }));
+
+    assert.commandWorked(
+        donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}));
+
+    assert.commandWorked(
+        donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}));
+})();
+
 donorRst.stopSet();
 recipientRst.stopSet();
 })();
diff --git a/jstests/replsets/transactions_only_allowed_on_primaries.js b/jstests/replsets/transactions_only_allowed_on_primaries.js
index 7b71cf3eb67..e987eda0205 100644
--- a/jstests/replsets/transactions_only_allowed_on_primaries.js
+++ b/jstests/replsets/transactions_only_allowed_on_primaries.js
@@ -27,8 +27,8 @@ replTest.initiate(config);
 const primary = replTest.getPrimary();
 const secondary = replTest.getSecondary();
 
-// Set slaveOk=true so that normal read commands would be allowed on the secondary.
-secondary.setSlaveOk(true);
+// Set secondaryOk=true so that normal read commands would be allowed on the secondary.
+secondary.setSecondaryOk();
 
 // Create a test collection that we can run commands against.
 const primaryDB = primary.getDB(dbName);
diff --git a/jstests/sharding/agg_mongos_slaveok.js b/jstests/sharding/agg_mongos_slaveok.js
index 287902092bc..01fb4286429 100644
--- a/jstests/sharding/agg_mongos_slaveok.js
+++ b/jstests/sharding/agg_mongos_slaveok.js
@@ -1,5 +1,5 @@
 /**
- * Tests aggregate command against mongos with slaveOk. For more tests on read preference,
+ * Tests aggregate command against mongos with secondaryOk. For more tests on read preference,
  * please refer to jstests/sharding/read_pref_cmd.js.
  * @tags: [
  *   requires_replication,
@@ -21,12 +21,12 @@ var doTest = function(st, doSharded) {
     }
 
     testDB.user.insert({x: 10}, {writeConcern: {w: NODES}});
-    testDB.setSlaveOk(true);
+    testDB.setSecondaryOk();
 
     var secNode = st.rs0.getSecondary();
     secNode.getDB('test').setProfilingLevel(2);
 
-    // wait for mongos to recognize that the slave is up
+    // wait for mongos to recognize that the secondary is up
     awaitRSClientHosts(st.s, secNode, {ok: true});
 
     var res = testDB.runCommand({aggregate: 'user', pipeline: [{$project: {x: 1}}], cursor: {}});
diff --git a/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js b/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
index b0bd0f59e8c..a11f8dbc694 100644
--- a/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
+++ b/jstests/sharding/all_shard_and_config_hosts_brought_down_one_by_one.js
@@ -39,7 +39,7 @@ jsTest.log('Config nodes up: 1 of 3, shard nodes up: 1 of 2: ' +
            'Only queries will work (no shard primary)');
 st.rs0.stop(0);
 st.restartMongos(0);
-st.s0.setSlaveOk(true);
+st.s0.setSecondaryOk();
 assert.eq([{_id: 0, count: 3}], st.s0.getDB('TestDB').TestColl.find().toArray());
 
 jsTest.log('Config nodes up: 1 of 3, shard nodes up: 0 of 2: ' +
diff --git a/jstests/sharding/auth_repl.js b/jstests/sharding/auth_repl.js
index cd89c91f136..b806090fc3a 100644
--- a/jstests/sharding/auth_repl.js
+++ b/jstests/sharding/auth_repl.js
@@ -19,7 +19,7 @@ var testColl = testDB.user;
 // before setting up authentication
 assert.commandWorked(adminDB.runCommand({replSetGetStatus: 1}));
 
-conn.setSlaveOk();
+conn.setSecondaryOk();
 assert.commandWorked(adminDB.runCommand({replSetGetStatus: 1}));
 
 // Add admin user using direct connection to primary to simulate connection from remote host
@@ -38,19 +38,19 @@ assert.eq(1, testDB.auth('a', 'a'));
 jsTest.log('Sending an authorized query that should be ok');
 assert.commandWorked(testColl.insert({x: 1}, {writeConcern: {w: nodeCount}}));
 
-conn.setSlaveOk(true);
+conn.setSecondaryOk();
 doc = testColl.findOne();
 assert(doc != null);
 
 doc = testColl.find().readPref('secondary').next();
 assert(doc != null);
 
-conn.setSlaveOk(false);
+conn.setSecondaryOk(false);
 doc = testColl.findOne();
 assert(doc != null);
 
 var queryToPriShouldFail = function() {
-    conn.setSlaveOk(false);
+    conn.setSecondaryOk(false);
 
     assert.throws(function() {
         testColl.findOne();
@@ -63,7 +63,7 @@ var queryToPriShouldFail = function() {
 };
 
 var queryToSecShouldFail = function() {
-    conn.setSlaveOk(true);
+    conn.setSecondaryOk();
 
     assert.throws(function() {
         testColl.findOne();
@@ -104,7 +104,7 @@ queryToPriShouldFail();
 assert.eq(1, testDB.auth('a', 'a'));
 
 // Find out the current cached secondary in the repl connection
-conn.setSlaveOk(true);
+conn.setSecondaryOk();
 var serverInfo = testColl.find().readPref('secondary').explain().serverInfo;
 var secNodeIdx = -1;
 var secPortStr = serverInfo.port.toString();
diff --git a/jstests/sharding/auth_slaveok_routing.js b/jstests/sharding/auth_slaveok_routing.js
index 8eff7833c9b..1e573fc7c9e 100644
--- a/jstests/sharding/auth_slaveok_routing.js
+++ b/jstests/sharding/auth_slaveok_routing.js
@@ -1,5 +1,5 @@
 /**
- * This tests whether slaveOk reads are properly routed through mongos in
+ * This tests whether secondaryOk reads are properly routed through mongos in
  * an authenticated environment. This test also includes restarting the
  * entire set, then querying afterwards.
  *
@@ -59,11 +59,11 @@ priAdminDB.createUser({user: 'user', pwd: 'password', roles: jsTest.adminUserRol
                       {w: 3, wtimeout: 30000});
 
 coll.drop();
-coll.setSlaveOk(true);
+coll.setSecondaryOk();
 
 /* Secondaries should be up here, but they can still be in RECOVERY
  * state, which will make the ReplicaSetMonitor mark them as
- * ok = false and not eligible for slaveOk queries.
+ * ok = false and not eligible for secondaryOk queries.
  */
 awaitRSClientHosts(mongos, replTest.getSecondaries(), {ok: true, secondary: true});
 
@@ -90,7 +90,7 @@ for (var n = 0; n < nodeCount; n++) {
 
 replTest.awaitSecondaryNodes();
 
-coll.setSlaveOk(true);
+coll.setSecondaryOk();
 
 /* replSetMonitor does not refresh the nodes information when getting secondaries.
  * A node that is previously labeled as secondary can now be a primary, so we
diff --git a/jstests/sharding/autodiscover_config_rs_from_secondary.js b/jstests/sharding/autodiscover_config_rs_from_secondary.js
index 9d9bd4adbd5..cc6ca3c11ae 100644
--- a/jstests/sharding/autodiscover_config_rs_from_secondary.js
+++ b/jstests/sharding/autodiscover_config_rs_from_secondary.js
@@ -53,7 +53,7 @@ var mongos = MongoRunner.runMongos({configdb: seedList});
 rst.stop(1);
 
 var admin = mongos.getDB('admin');
-mongos.setSlaveOk(true);
+mongos.setSecondaryOk();
 assert.eq(1, admin.foo.findOne().a);
 MongoRunner.stopMongos(mongos);
 rst.stopSet();
diff --git a/jstests/sharding/balance_repl.js b/jstests/sharding/balance_repl.js
index 83c92ff37b1..fb501c979cb 100644
--- a/jstests/sharding/balance_repl.js
+++ b/jstests/sharding/balance_repl.js
@@ -44,7 +44,7 @@ var collPrimary = (new Mongo(s.s0.host)).getDB('TestDB').TestColl;
 assert.eq(2100, collPrimary.find().itcount());
 
 var collSlaveOk = (new Mongo(s.s0.host)).getDB('TestDB').TestColl;
-collSlaveOk.setSlaveOk();
+collSlaveOk.setSecondaryOk();
 assert.eq(2100, collSlaveOk.find().itcount());
 
 assert.commandWorked(s.s0.adminCommand({
diff --git a/jstests/sharding/chunk_history_window.js b/jstests/sharding/chunk_history_window.js
index 1be21395483..adc2ca7247a 100644
--- a/jstests/sharding/chunk_history_window.js
+++ b/jstests/sharding/chunk_history_window.js
@@ -21,17 +21,29 @@
 
 load("jstests/sharding/libs/sharded_transactions_helpers.js");
 
-const configHistoryWindowSecs = 10;
+// The snapshot window is the max of minSnapshotHistoryWindowInSeconds and
+// transactionLifetimeLimitSeconds.
+const transactionLifetimeLimitSecs = 15;
+const minSnapshotHistoryWindowSecs = transactionLifetimeLimitSecs;
+const snapshotHistoryWindowSecs =
+    Math.max(minSnapshotHistoryWindowSecs, transactionLifetimeLimitSecs);
+
 const st = new ShardingTest({
     shards: {rs0: {nodes: 2}, rs1: {nodes: 2}},
     other: {
         configOptions: {
             setParameter: {
-                minSnapshotHistoryWindowInSeconds: configHistoryWindowSecs,
+                minSnapshotHistoryWindowInSeconds: minSnapshotHistoryWindowSecs,
+                transactionLifetimeLimitSeconds: transactionLifetimeLimitSecs,
                 logComponentVerbosity: tojson({sharding: {verbosity: 2}})
             }
         },
-        rsOptions: {setParameter: {minSnapshotHistoryWindowInSeconds: 600}}
+        rsOptions: {
+            setParameter: {
+                minSnapshotHistoryWindowInSeconds: minSnapshotHistoryWindowSecs,
+                transactionLifetimeLimitSeconds: transactionLifetimeLimitSecs,
+            }
+        }
     }
 });
 
@@ -40,14 +52,14 @@ assert.eq(assert
               .commandWorked(
                   primaryAdmin.runCommand({getParameter: 1, minSnapshotHistoryWindowInSeconds: 1}))
               .minSnapshotHistoryWindowInSeconds,
-          600);
+          minSnapshotHistoryWindowSecs);
 
 const configAdmin = st.configRS.getPrimary().getDB("admin");
 assert.eq(assert
               .commandWorked(
                   configAdmin.runCommand({getParameter: 1, minSnapshotHistoryWindowInSeconds: 1}))
               .minSnapshotHistoryWindowInSeconds,
-          10);
+          minSnapshotHistoryWindowSecs);
 
 const mongosDB = st.s.getDB(jsTestName());
 const mongosColl = mongosDB.test;
@@ -81,9 +93,9 @@ assert.eq(2, chunk.history.length, tojson(chunk));
 // Test history window with 1s margin.
 const testMarginMS = 1000;
 
-// Test that reading from a snapshot at insertTS is valid for up to configHistoryWindowSecs
+// Test that reading from a snapshot at insertTS is valid for up to snapshotHistoryWindowSecs
 // minus the testMarginMS (as a buffer).
-const testWindowMS = configHistoryWindowSecs * 1000 - testMarginMS;
+const testWindowMS = snapshotHistoryWindowSecs * 1000 - testMarginMS;
 while (Date.now() - 1000 * insertTS.getTime() < testWindowMS) {
     // Test that reading from a snapshot at insertTS is still valid.
     assert.commandWorked(mongosDB.runCommand(
@@ -95,7 +107,7 @@ while (Date.now() - 1000 * insertTS.getTime() < testWindowMS) {
 }
 
 // Sleep until our most recent chunk move is before the oldest history in our window.
-const chunkExpirationTime = postMoveChunkTime + configHistoryWindowSecs * 1000;
+const chunkExpirationTime = postMoveChunkTime + snapshotHistoryWindowSecs * 1000;
 sleep(chunkExpirationTime + testMarginMS - Date.now());
 
 jsTestLog("Move chunk back to shard 0 to trigger history cleanup");
diff --git a/jstests/sharding/chunk_operations_invalidate_single_shard.js b/jstests/sharding/chunk_operations_invalidate_single_shard.js
index e660cec2305..30a736fcdea 100644
--- a/jstests/sharding/chunk_operations_invalidate_single_shard.js
+++ b/jstests/sharding/chunk_operations_invalidate_single_shard.js
@@ -52,6 +52,7 @@ let testSplit = () => {
     const mongosCollectionVersion = getMongosCollVersion(ns);
 
     assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: -500}}));
+    assert.eq(mongosCollectionVersion, getMongosCollVersion(ns));
 
     testColl.findOne({x: 0});
     testColl.findOne({x: 1000});
diff --git a/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js b/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
index 6c661e0abac..6b61bd12a68 100644
--- a/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
+++ b/jstests/sharding/cluster_create_indexes_always_routes_through_primary.js
@@ -1,5 +1,5 @@
 // Ensure that a call to createIndexes in a sharded cluster will route to the primary, even when
-// setSlaveOk() is set to true.
+// setSecondaryOk() is set to true.
 (function() {
 'use strict';
 
@@ -12,7 +12,7 @@ assert.commandWorked(testDB.adminCommand({enableSharding: testDBName}));
 assert.commandWorked(
     testDB.adminCommand({shardCollection: testDB[collName].getFullName(), key: {x: 1}}));
 
-st.s.setSlaveOk(true);
+st.s.setSecondaryOk();
 assert.commandWorked(
     testDB.runCommand({createIndexes: collName, indexes: [{key: {a: 1}, name: "index"}]}));
 
diff --git a/jstests/sharding/config_rs_no_primary.js b/jstests/sharding/config_rs_no_primary.js
index 91ce74de45d..8bcf7e54cd4 100644
--- a/jstests/sharding/config_rs_no_primary.js
+++ b/jstests/sharding/config_rs_no_primary.js
@@ -43,9 +43,9 @@ var testOps = function(mongos) {
     assert.throws(function() {
         mongos.getDB('config').shards.findOne();
     });
-    mongos.setSlaveOk(true);
+    mongos.setSecondaryOk();
     var shardDoc = mongos.getDB('config').shards.findOne();
-    mongos.setSlaveOk(false);
+    mongos.setSecondaryOk(false);
     assert.neq(null, shardDoc);
 
     jsTestLog("Doing ops that require metadata writes and thus should fail against: " + mongos);
diff --git a/jstests/sharding/count_config_servers.js b/jstests/sharding/count_config_servers.js
index ded75607cd0..0904a873e52 100644
--- a/jstests/sharding/count_config_servers.js
+++ b/jstests/sharding/count_config_servers.js
@@ -13,7 +13,7 @@ TestData.skipCheckOrphans = true;
 "use strict";
 
 var st = new ShardingTest({name: 'sync_conn_cmd', shards: 0});
-st.s.setSlaveOk(true);
+st.s.setSecondaryOk();
 
 var configDB = st.config;
 var coll = configDB.test;
diff --git a/jstests/sharding/count_slaveok.js b/jstests/sharding/count_slaveok.js
index e527128a7cd..23612d96220 100644
--- a/jstests/sharding/count_slaveok.js
+++ b/jstests/sharding/count_slaveok.js
@@ -1,5 +1,5 @@
 /**
- * Tests count and distinct using slaveOk. Also tests a scenario querying a set where only one
+ * Tests count and distinct using secondaryOk. Also tests a scenario querying a set where only one
  * secondary is up.
  */
 
@@ -20,7 +20,7 @@ var rst = st.rs0;
 // Insert data into replica set
 var conn = new Mongo(st.s.host);
 
-var coll = conn.getCollection('test.countSlaveOk');
+var coll = conn.getCollection('test.countSecondaryOk');
 coll.drop();
 
 var bulk = coll.initializeUnorderedBulkOp();
@@ -51,9 +51,9 @@ awaitRSClientHosts(conn, sec, {ok: true, secondary: true});
 // Make sure that mongos realizes that primary is already down
 awaitRSClientHosts(conn, primary, {ok: false});
 
-// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when
+// Need to check secondaryOk=true first, since secondaryOk=false will destroy conn in pool when
 // master is down
-conn.setSlaveOk();
+conn.setSecondaryOk();
 
 // count using the command path
 assert.eq(30, coll.find({i: 0}).count());
@@ -62,14 +62,14 @@ assert.eq(30, coll.find({i: 0}).itcount());
 assert.eq(10, coll.distinct("i").length);
 
 try {
-    conn.setSlaveOk(false);
-    // Should throw exception, since not slaveOk'd
+    conn.setSecondaryOk(false);
+    // Should throw exception, since not secondaryOk'd
     coll.find({i: 0}).count();
 
     print("Should not reach here!");
     assert(false);
 } catch (e) {
-    print("Non-slaveOk'd connection failed.");
+    print("Non-secondaryOk'd connection failed.");
 }
 
 st.stop();
diff --git a/jstests/sharding/error_propagation.js b/jstests/sharding/error_propagation.js
index 6f47075f753..6fa9b7da74c 100644
--- a/jstests/sharding/error_propagation.js
+++ b/jstests/sharding/error_propagation.js
@@ -8,7 +8,7 @@
 var st = new ShardingTest({mongos: 1, shards: 1, rs: {nodes: 3}});
 
 var db = st.getDB('test');
-db.setSlaveOk(true);
+db.setSecondaryOk();
 
 assert.commandWorked(db.foo.insert({a: 1}, {writeConcern: {w: 3}}));
 assert.commandWorked(db.runCommand(
diff --git a/jstests/sharding/mongos_forwards_api_parameters_to_shards.js b/jstests/sharding/mongos_forwards_api_parameters_to_shards.js
new file mode 100644
index 00000000000..e611f716992
--- /dev/null
+++ b/jstests/sharding/mongos_forwards_api_parameters_to_shards.js
@@ -0,0 +1,213 @@
+/**
+ * When a client calls a mongos command with API parameters, mongos must forward them to shards.
+ *
+ * @tags: [multiversion_incompatible]
+ */
+
+(function() {
+'use strict';
+
+load('jstests/sharding/libs/sharded_transactions_helpers.js');
+
+let st = new ShardingTest({
+    mongos: 1,
+    shards: 2,
+    rs: {nodes: 1, setParameter: {logComponentVerbosity: tojson({command: {verbosity: 2}})}}
+});
+
+class APIParameterTest {
+    constructor(
+        command,
+        {dbName = "db", inAPIVersion1 = true, permittedInTxn = true, shardCommandName} = {}) {
+        this.command = command;
+        this.dbName = dbName;
+        this.inAPIVersion1 = inAPIVersion1;
+        this.permittedInTxn = permittedInTxn;
+        if (shardCommandName === undefined) {
+            this.commandName = Object.keys(command)[0];
+        } else {
+            // mongos executes a different command on the shards, e.g. mapReduce becomes aggregate.
+            this.commandName = shardCommandName;
+        }
+    }
+}
+
+const tests = [
+    // Write commands. Note, these rely on _id 1 residing on shard 0.
+    new APIParameterTest({insert: "collection", documents: [{_id: 1}]}),
+    new APIParameterTest({update: "collection", updates: [{q: {_id: 1}, u: {$set: {x: 1}}}]}),
+    new APIParameterTest({delete: "collection", deletes: [{q: {_id: 1}, limit: 1}]}),
+
+    // Read commands.
+    new APIParameterTest({aggregate: "collection", pipeline: [], cursor: {}}),
+    new APIParameterTest({aggregate: "collection", pipeline: [], cursor: {}, explain: true},
+                         {shardCommandName: "explain", permittedInTxn: false}),
+    new APIParameterTest({find: "collection"}),
+    new APIParameterTest({count: "collection"}, {permittedInTxn: false}),
+    new APIParameterTest({count: "collection", query: {_id: {$lt: 0}}},
+                         {inAPIVersion1: false, permittedInTxn: false}),
+    new APIParameterTest({distinct: "collection", key: "_id"},
+                         {inAPIVersion1: false, permittedInTxn: false}),
+    new APIParameterTest(
+        {
+            mapReduce: "collection",
+            map: function() {
+                emit(1, 1);
+            },
+            reduce: function(key, values) {
+                return {count: values.length};
+            },
+            out: {inline: 1}
+        },
+        {inAPIVersion1: false, permittedInTxn: false, shardCommandName: "aggregate"}),
+
+    // FindAndModify.
+    new APIParameterTest({findAndModify: "collection", query: {_id: 1}, remove: true}),
+
+    // DDL. Order matters: we must create, modify, then drop an index on collection2.
+    new APIParameterTest({createIndexes: "collection2", indexes: [{key: {x: 1}, name: "x_1"}]}),
+    new APIParameterTest({collMod: "collection2", index: {keyPattern: {x: 1}, hidden: true}},
+                         {permittedInTxn: false}),
+    new APIParameterTest({dropIndexes: "collection2", index: "x_1"}, {permittedInTxn: false}),
+    // We can create indexes on a non-existent collection in a sharded transaction.
+    new APIParameterTest({create: "newCollection"}),
+    new APIParameterTest({renameCollection: "db.newCollection", to: "db.newerCollection"},
+                         {inAPIVersion1: false, permittedInTxn: false, dbName: "admin"}),
+    new APIParameterTest({drop: "collection"}, {permittedInTxn: false}),
+    new APIParameterTest({dropDatabase: 1}, {permittedInTxn: false}),
+];
+
+function checkPrimaryLog(conn, commandName, apiVersion, apiStrict, apiDeprecationErrors, message) {
+    const logs = checkLog.getGlobalLog(conn);
+    let lastCommandInvocation;
+
+    for (let logMsg of logs) {
+        const obj = JSON.parse(logMsg);
+        // Search for "About to run the command" logs.
+        if (obj.id !== 21965) {
+            continue;
+        }
+
+        const args = obj.attr.commandArgs;
+        if (commandName !== Object.keys(args)[0]) {
+            continue;
+        }
+
+        lastCommandInvocation = args;
+        if (args.apiVersion !== apiVersion || args.apiStrict !== apiStrict ||
+            args.apiDeprecationErrors !== apiDeprecationErrors) {
+            continue;
+        }
+
+        // Found a match.
+        return;
+    }
+
+    if (lastCommandInvocation === undefined) {
+        doassert(`Primary didn't log ${commandName}`);
+        return;
+    }
+
+    doassert(`Primary didn't log ${message}, last invocation of ${commandName} was` +
+             ` ${tojson(lastCommandInvocation)}`);
+}
+
+for (const sharded of [false, true]) {
+    for (const [apiVersion, apiStrict, apiDeprecationErrors] of [[undefined, undefined, undefined],
+                                                                 ["1", undefined, undefined],
+                                                                 ["1", undefined, false],
+                                                                 ["1", undefined, true],
+                                                                 ["1", false, undefined],
+                                                                 ["1", false, false],
+                                                                 ["1", false, true],
+                                                                 ["1", true, undefined],
+                                                                 ["1", true, false],
+                                                                 ["1", true, true],
+    ]) {
+        for (let inTransaction of [false, true]) {
+            if (sharded) {
+                jsTestLog("Sharded setup");
+                assert.commandWorked(st.s.getDB("db")["collection"].insert(
+                    {_id: 0}, {writeConcern: {w: "majority"}}));
+                assert.commandWorked(st.s.getDB("db")["collection"].insert(
+                    {_id: 20}, {writeConcern: {w: "majority"}}));
+
+                assert.commandWorked(st.s.adminCommand({enableSharding: "db"}));
+                st.ensurePrimaryShard("db", st.shard0.shardName);
+                assert.commandWorked(
+                    st.s.adminCommand({shardCollection: "db.collection", key: {_id: 1}}));
+
+                // The chunk with _id 1 is on shard 0.
+                assert.commandWorked(
+                    st.s.adminCommand({split: "db.collection", middle: {_id: 10}}));
+                assert.commandWorked(st.s.adminCommand(
+                    {moveChunk: "db.collection", find: {_id: 20}, to: st.shard1.shardName}));
+            } else {
+                jsTestLog("Unsharded setup");
+                assert.commandWorked(st.s.getDB("db")["collection"].insert(
+                    {_id: 0}, {writeConcern: {w: "majority"}}));
+                st.ensurePrimaryShard("db", st.shard0.shardName);
+            }
+
+            // Shard 0's primary.
+            const primary = st.rs0.getPrimary();
+
+            for (const test of tests) {
+                if (inTransaction && !test.permittedInTxn) {
+                    continue;
+                }
+
+                if (apiStrict && !test.inAPIVersion1) {
+                    continue;
+                }
+
+                // Make a copy of the test's command body, and set its API parameters.
+                const commandWithAPIParams = Object.assign({}, test.command);
+                if (apiVersion !== undefined) {
+                    commandWithAPIParams.apiVersion = apiVersion;
+                }
+
+                if (apiStrict !== undefined) {
+                    commandWithAPIParams.apiStrict = apiStrict;
+                }
+
+                if (apiDeprecationErrors !== undefined) {
+                    commandWithAPIParams.apiDeprecationErrors = apiDeprecationErrors;
+                }
+
+                assert.commandWorked(primary.adminCommand({clearLog: "global"}));
+                const message = `command ${tojson(commandWithAPIParams)}` +
+                    ` ${sharded ? "sharded" : "unsharded"},` +
+                    ` ${inTransaction ? "in" : "outside"} transaction`;
+
+                flushRoutersAndRefreshShardMetadata(st, {ns: "db.collection"});
+
+                jsTestLog(`Running ${message}`);
+
+                if (inTransaction) {
+                    const session = st.s0.startSession();
+                    const sessionDb = session.getDatabase(test.dbName);
+                    session.startTransaction();
+                    assert.commandWorked(sessionDb.runCommand(commandWithAPIParams));
+                    assert.commandWorked(session.commitTransaction_forTesting());
+                } else {
+                    const db = st.s0.getDB(test.dbName);
+                    assert.commandWorked(db.runCommand(commandWithAPIParams));
+                }
+
+                checkPrimaryLog(primary,
+                                test.commandName,
+                                apiVersion,
+                                apiStrict,
+                                apiDeprecationErrors,
+                                message);
+            }
+
+            jsTestLog("JS test cleanup: Drop database 'db'");
+            st.s0.getDB("db").runCommand({dropDatabase: 1});
+        }
+    }
+}
+
+st.stop();
+})();
diff --git a/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js b/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
index 5cb277197b1..466c4314d45 100644
--- a/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
+++ b/jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
@@ -160,9 +160,9 @@ gc();  // Clean up new connections
 
 jsTest.log("Stopping primary of second shard...");
 
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 
 // Need to save this node for later
 var rs1Secondary = st.rs1.getSecondary();
@@ -192,13 +192,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
 jsTest.log("Testing new connections with second primary down...");
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
@@ -212,9 +212,9 @@ gc();  // Clean up new connections
 
 jsTest.log("Stopping primary of first shard...");
 
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 
 st.rs0.stop(st.rs0.getPrimary());
 
@@ -241,13 +241,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
 jsTest.log("Testing new connections with first primary down...");
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
@@ -261,9 +261,9 @@ gc();  // Clean up new connections
 
 jsTest.log("Stopping second shard...");
 
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 mongosConnIdle = authDBUsers(new Mongo(mongos.host));
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 
 st.rs1.stop(rs1Secondary);
 
@@ -288,10 +288,10 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
 jsTest.log("Testing new connections with second shard down...");
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 mongosConnNew = authDBUsers(new Mongo(mongos.host));
diff --git a/jstests/sharding/mongos_rs_shard_failure_tolerance.js b/jstests/sharding/mongos_rs_shard_failure_tolerance.js
index 34d68c45f6e..89dc4c07986 100644
--- a/jstests/sharding/mongos_rs_shard_failure_tolerance.js
+++ b/jstests/sharding/mongos_rs_shard_failure_tolerance.js
@@ -131,11 +131,11 @@ st.rs1.stop(st.rs1.getPrimary());
 jsTest.log("Testing active connection with second primary down...");
 
 // Reads with read prefs
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1}));
 assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnActive.setSlaveOk(false);
+mongosConnActive.setSecondaryOk(false);
 
 mongosConnActive.setReadPref("primary");
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -145,14 +145,14 @@ assert.throws(function() {
 assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 // Ensure read prefs override slaveOK
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 mongosConnActive.setReadPref("primary");
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.throws(function() {
     mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1});
 });
 assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnActive.setSlaveOk(false);
+mongosConnActive.setSecondaryOk(false);
 
 mongosConnActive.setReadPref("secondary");
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -187,11 +187,11 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
 assert.commandWorked(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 6}, wc));
 
 // Reads with read prefs
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1}));
 assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnIdle.setSlaveOk(false);
+mongosConnIdle.setSecondaryOk(false);
 
 mongosConnIdle.setReadPref("primary");
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -201,14 +201,14 @@ assert.throws(function() {
 assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 // Ensure read prefs override slaveOK
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 mongosConnIdle.setReadPref("primary");
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.throws(function() {
     mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1});
 });
 assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
-mongosConnIdle.setSlaveOk(false);
+mongosConnIdle.setSecondaryOk(false);
 
 mongosConnIdle.setReadPref("secondary");
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
@@ -234,13 +234,13 @@ jsTest.log("Testing new connections with second primary down...");
 
 // Reads with read prefs
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 gc();  // Clean up new connections incrementally to compensate for slow win32 machine.
@@ -261,17 +261,17 @@ gc();  // Clean up new connections incrementally to compensate for slow win32 ma
 
 // Ensure read prefs override slaveok
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 mongosConnNew.setReadPref("primary");
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 mongosConnNew.setReadPref("primary");
 assert.throws(function() {
     mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1});
 });
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 mongosConnNew.setReadPref("primary");
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
@@ -343,7 +343,7 @@ st.rs0.stop(st.rs0.getPrimary());
 
 jsTest.log("Testing active connection with first primary down...");
 
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: 1}));
 assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -358,7 +358,7 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
 assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_id: 9}));
 assert.writeError(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 9}));
 
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: 1}));
 assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
@@ -366,13 +366,13 @@ assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne(
 jsTest.log("Testing new connections with first primary down...");
 
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: 1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 mongosConnNew = new Mongo(mongos.host);
@@ -392,7 +392,7 @@ st.rs1.stop(rs1Secondary);
 
 jsTest.log("Testing active connection with second shard down...");
 
-mongosConnActive.setSlaveOk();
+mongosConnActive.setSecondaryOk();
 assert.neq(null, mongosConnActive.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnActive.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
@@ -406,17 +406,17 @@ assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_
 assert.writeError(mongosConnIdle.getCollection(collSharded.toString()).insert({_id: 12}));
 assert.writeError(mongosConnIdle.getCollection(collUnsharded.toString()).insert({_id: 12}));
 
-mongosConnIdle.setSlaveOk();
+mongosConnIdle.setSecondaryOk();
 assert.neq(null, mongosConnIdle.getCollection(collSharded.toString()).findOne({_id: -1}));
 assert.neq(null, mongosConnIdle.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 jsTest.log("Testing new connections with second shard down...");
 
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collSharded.toString()).findOne({_id: -1}));
 mongosConnNew = new Mongo(mongos.host);
-mongosConnNew.setSlaveOk();
+mongosConnNew.setSecondaryOk();
 assert.neq(null, mongosConnNew.getCollection(collUnsharded.toString()).findOne({_id: 1}));
 
 mongosConnNew = new Mongo(mongos.host);
diff --git a/jstests/sharding/query/explain_read_pref.js b/jstests/sharding/query/explain_read_pref.js
index ce5e2cf47af..c3c51d85756 100644
--- a/jstests/sharding/query/explain_read_pref.js
+++ b/jstests/sharding/query/explain_read_pref.js
@@ -58,7 +58,7 @@ var testAllModes = function(conn, isMongos) {
         var mode = args[0], tagSets = args[1], secExpected = args[2];
 
         var testDB = conn.getDB('TestDB');
-        conn.setSlaveOk(false);  // purely rely on readPref
+        conn.setSecondaryOk(false);  // purely rely on readPref
         jsTest.log('Testing mode: ' + mode + ', tag sets: ' + tojson(tagSets));
 
         // .explain().find()
diff --git a/jstests/sharding/read_pref.js b/jstests/sharding/read_pref.js
index 95c0e9697c3..9267cb18430 100644
--- a/jstests/sharding/read_pref.js
+++ b/jstests/sharding/read_pref.js
@@ -134,7 +134,7 @@ var doTest = function(useDollarQuerySyntax) {
     var explainServer = getExplainServer(explain);
     assert.neq(primaryNode.name, explainServer);
 
-    conn.setSlaveOk();
+    conn.setSecondaryOk();
 
     // It should also work with slaveOk
     explain = getExplain("secondary");
diff --git a/jstests/sharding/read_pref_cmd.js b/jstests/sharding/read_pref_cmd.js
index 2c2a7f3332b..f94dd924f45 100644
--- a/jstests/sharding/read_pref_cmd.js
+++ b/jstests/sharding/read_pref_cmd.js
@@ -165,7 +165,7 @@ let testConnReadPreference = function(conn, isMongos, rsNodes, {readPref, expect
 
     let testDB = conn.getDB(kDbName);
     let shardedColl = conn.getCollection(kShardedNs);
-    conn.setSlaveOk(false);  // purely rely on readPref
+    conn.setSecondaryOk(false);  // purely rely on readPref
     conn.setReadPref(readPref.mode, readPref.tagSets, readPref.hedge);
 
     /**
@@ -387,7 +387,7 @@ let testCursorReadPreference = function(conn, isMongos, rsNodes, {readPref, expe
         tojson(readPref.tagSets)}, hedge ${tojson(readPref.hedge)}`);
 
     let testColl = conn.getCollection(kShardedNs);
-    conn.setSlaveOk(false);  // purely rely on readPref
+    conn.setSecondaryOk(false);  // purely rely on readPref
 
     let bulk = testColl.initializeUnorderedBulkOp();
     for (let i = 0; i < kNumDocs; ++i) {
diff --git a/jstests/sharding/read_write_concern_defaults_application.js b/jstests/sharding/read_write_concern_defaults_application.js
index 5db16a6e27f..1fd7146d32f 100644
--- a/jstests/sharding/read_write_concern_defaults_application.js
+++ b/jstests/sharding/read_write_concern_defaults_application.js
@@ -459,6 +459,7 @@ let testCases = {
     hello: {skip: "does not accept read or write concern"},
     hostInfo: {skip: "does not accept read or write concern"},
     httpClientRequest: {skip: "does not accept read or write concern"},
+    importCollection: {skip: "internal command"},
     insert: {
         setUp: function(conn) {
             assert.commandWorked(conn.getDB(db).runCommand({create: coll, writeConcern: {w: 1}}));
diff --git a/jstests/sharding/recovering_slaveok.js b/jstests/sharding/recovering_slaveok.js
index 512719b08b6..d9bcd44da87 100644
--- a/jstests/sharding/recovering_slaveok.js
+++ b/jstests/sharding/recovering_slaveok.js
@@ -1,6 +1,6 @@
 /**
- * This tests that slaveOk'd queries in sharded setups get correctly routed when a slave goes into
- * RECOVERING state, and don't break
+ * This tests that secondaryOk'd queries in sharded setups get correctly routed when a slave goes
+ * into RECOVERING state, and don't break
  */
 
 // Shard secondaries are restarted, which may cause that shard's primary to stepdown while it does
@@ -12,11 +12,11 @@ TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
 load("jstests/replsets/rslib.js");
 
 var shardTest =
-    new ShardingTest({name: "recovering_slaveok", shards: 2, mongos: 2, other: {rs: true}});
+    new ShardingTest({name: "recovering_secondaryok", shards: 2, mongos: 2, other: {rs: true}});
 
 var mongos = shardTest.s0;
 var mongosSOK = shardTest.s1;
-mongosSOK.setSlaveOk();
+mongosSOK.setSecondaryOk();
 
 var admin = mongos.getDB("admin");
 var config = mongos.getDB("config");
@@ -50,7 +50,7 @@ shardTest.shardColl(coll,
                     /* dbname */ null,
                     /* waitForDelete */ true);
 
-print("3: test normal and slaveOk queries");
+print("3: test normal and secondaryOk queries");
 
 // Make shardA and rsA the same
 var shardA = shardTest.getShard(coll, {_id: -1});
@@ -87,7 +87,7 @@ print("6: stop non-RECOVERING secondary");
 
 rsA.stop(goodSec);
 
-print("7: check our regular and slaveOk query");
+print("7: check our regular and secondaryOk query");
 
 assert.eq(2, coll.find().itcount());
 assert.eq(2, collSOk.find().itcount());
@@ -100,7 +100,7 @@ print("9: wait for recovery");
 
 rsA.waitForState(rsA.getSecondaries(), ReplSetTest.State.SECONDARY, 5 * 60 * 1000);
 
-print("10: check our regular and slaveOk query");
+print("10: check our regular and secondaryOk query");
 
 // We need to make sure our nodes are considered accessible from mongos - otherwise we fail
 // See SERVER-7274
@@ -112,7 +112,7 @@ awaitRSClientHosts(coll.getMongo(), rsB.nodes, {ok: true});
 awaitRSClientHosts(collSOk.getMongo(), [rsA.getSecondaries()[0]], {secondary: true, ok: true});
 awaitRSClientHosts(collSOk.getMongo(), [rsB.getSecondaries()[0]], {secondary: true, ok: true});
 
-print("SlaveOK Query...");
+print("SecondaryOk Query...");
 var sOKCount = collSOk.find().itcount();
 
 var collCount = null;
diff --git a/jstests/sharding/session_info_in_oplog.js b/jstests/sharding/session_info_in_oplog.js
index 617d5759207..a7644fca599 100644
--- a/jstests/sharding/session_info_in_oplog.js
+++ b/jstests/sharding/session_info_in_oplog.js
@@ -329,7 +329,7 @@ replTest.initiate();
 
 var priConn = replTest.getPrimary();
 var secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 
 runTests(priConn, priConn, secConn);
 
@@ -338,7 +338,7 @@ replTest.stopSet();
 var st = new ShardingTest({shards: {rs0: {nodes: kNodes}}});
 
 secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 runTests(st.s, st.rs0.getPrimary(), secConn);
 
 st.stop();
diff --git a/jstests/sharding/shard_aware_init_secondaries.js b/jstests/sharding/shard_aware_init_secondaries.js
index 59a8542f44b..f852c6e58a1 100644
--- a/jstests/sharding/shard_aware_init_secondaries.js
+++ b/jstests/sharding/shard_aware_init_secondaries.js
@@ -41,7 +41,7 @@ assert.commandWorked(priConn.getDB('admin').system.version.update(
     shardIdentityQuery, shardIdentityUpdate, {upsert: true, writeConcern: {w: 2}}));
 
 var secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 
 var res = secConn.getDB('admin').runCommand({shardingState: 1});
 
@@ -55,7 +55,7 @@ replTest.waitForPrimary();
 replTest.awaitSecondaryNodes();
 
 secConn = replTest.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 
 res = secConn.getDB('admin').runCommand({shardingState: 1});
 
diff --git a/jstests/sharding/shard_identity_config_update.js b/jstests/sharding/shard_identity_config_update.js
index 3e668c5903c..43c10bbbd22 100644
--- a/jstests/sharding/shard_identity_config_update.js
+++ b/jstests/sharding/shard_identity_config_update.js
@@ -63,7 +63,7 @@ assert.soon(function() {
 });
 
 var secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 assert.soon(function() {
     return checkConfigStrUpdated(secConn, expectedConfigStr);
 });
@@ -96,7 +96,7 @@ assert.soon(function() {
 });
 
 secConn = st.rs0.getSecondary();
-secConn.setSlaveOk(true);
+secConn.setSecondaryOk();
 assert.soon(function() {
     return checkConfigStrUpdated(secConn, origConfigConnStr);
 });
diff --git a/jstests/sharding/shard_identity_rollback.js b/jstests/sharding/shard_identity_rollback.js
index d6e47fa3137..25dbc2e19e4 100644
--- a/jstests/sharding/shard_identity_rollback.js
+++ b/jstests/sharding/shard_identity_rollback.js
@@ -52,7 +52,7 @@ assert.eq(shardIdentityDoc.clusterId, res.clusterId);
 
 // Ensure sharding state on the secondaries was *not* initialized
 secondaries.forEach(function(secondary) {
-    secondary.setSlaveOk(true);
+    secondary.setSecondaryOk();
     res = secondary.getDB('admin').runCommand({shardingState: 1});
     assert(!res.enabled, tojson(res));
 });
@@ -105,7 +105,7 @@ try {
 // specified. We do want to wait to be able to connect to the node here however, so we need to pass
 // {waitForConnect: true}.
 priConn = replTest.start(priConn.nodeId, {shardsvr: '', waitForConnect: true}, true);
-priConn.setSlaveOk();
+priConn.setSecondaryOk();
 
 // Wait for the old primary to replicate the document that was written to the new primary while
 // it was shut down.
diff --git a/jstests/sharding/shard_insert_getlasterror_w2.js b/jstests/sharding/shard_insert_getlasterror_w2.js
index 7bde30b2dc5..a4a0f5c540f 100644
--- a/jstests/sharding/shard_insert_getlasterror_w2.js
+++ b/jstests/sharding/shard_insert_getlasterror_w2.js
@@ -70,7 +70,7 @@ replSet1.stop(secondary2);
 replSet1.waitForState(primary, ReplSetTest.State.SECONDARY);
 
 testDB.getMongo().adminCommand({setParameter: 1, logLevel: 1});
-testDB.getMongo().setSlaveOk();
+testDB.getMongo().setSecondaryOk();
 print("trying some queries");
 assert.soon(function() {
     try {
diff --git a/jstests/slow1/replsets_priority1.js b/jstests/slow1/replsets_priority1.js
index 3ff6c058cc7..4dea828c793 100644
--- a/jstests/slow1/replsets_priority1.js
+++ b/jstests/slow1/replsets_priority1.js
@@ -146,9 +146,9 @@ for (var i = 0; i < n; i++) {
     assert.soon(function() {
         var versions = [0, 0];
         var secondaries = rs.getSecondaries();
-        secondaries[0].setSlaveOk();
+        secondaries[0].setSecondaryOk();
         versions[0] = secondaries[0].getDB("local").system.replset.findOne().version;
-        secondaries[1].setSlaveOk();
+        secondaries[1].setSecondaryOk();
         versions[1] = secondaries[1].getDB("local").system.replset.findOne().version;
         return versions[0] == config.version && versions[1] == config.version;
     });
diff --git a/jstests/ssl/mongo_uri_secondaries.js b/jstests/ssl/mongo_uri_secondaries.js
index a4ed1eae93c..73cca540c80 100644
--- a/jstests/ssl/mongo_uri_secondaries.js
+++ b/jstests/ssl/mongo_uri_secondaries.js
@@ -39,7 +39,7 @@ const subShellCommand = function(hosts) {
 
     for (var i = 0; i < 10; i++) {
         var db = Ms[i].getDB("test");
-        db.setSlaveOk(true);
+        db.setSecondaryOk();
         db.col.find().readPref("secondary").toArray();
     }
 };
diff --git a/src/mongo/client/dbclient_base.cpp b/src/mongo/client/dbclient_base.cpp
index 04748a525af..9a7c87eed66 100644
--- a/src/mongo/client/dbclient_base.cpp
+++ b/src/mongo/client/dbclient_base.cpp
@@ -49,8 +49,8 @@
 #include "mongo/client/constants.h"
 #include "mongo/client/dbclient_cursor.h"
 #include "mongo/config.h"
+#include "mongo/db/api_parameters_gen.h"
 #include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters_gen.h"
 #include "mongo/db/json.h"
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/query/kill_cursors_gen.h"
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 2d90253f008..a0c8de84714 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -869,6 +869,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/storage/storage_engine_lock_file',
         '$BUILD_DIR/mongo/db/storage/storage_engine_metadata',
         'commands/server_status_core',
+        'initialize_api_parameters',
         'introspect',
         'lasterror',
         'query_exec',
@@ -1460,17 +1461,39 @@ env.Library(
 env.Library(
     target='shared_request_handling',
     source=[
-        'initialize_api_parameters.cpp',
         'transaction_validation.cpp',
-        env.Idlc('initialize_api_parameters.idl')[0],
     ],
     LIBDEPS=[
+        'api_parameters',
         'error_labels',
         'logical_session_cache_impl',
     ],
 )
 
 env.Library(
+    target='api_parameters',
+    source=[
+        'api_parameters.cpp',
+        env.Idlc('api_parameters.idl')[0],
+    ],
+    LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/idl/idl_parser',
+        '$BUILD_DIR/mongo/idl/server_parameter',
+    ],
+)
+
+env.Library(
+    target='initialize_api_parameters',
+    source=[
+        'initialize_api_parameters.cpp',
+    ],
+    LIBDEPS_PRIVATE=[
+        'api_parameters',
+        'commands',
+    ],
+)
+
+env.Library(
     target='logical_time',
     source=[
         'logical_time.cpp',
diff --git a/src/mongo/db/api_parameters.cpp b/src/mongo/db/api_parameters.cpp
new file mode 100644
index 00000000000..05ffe9c49cb
--- /dev/null
+++ b/src/mongo/db/api_parameters.cpp
@@ -0,0 +1,79 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/api_parameters.h"
+
+namespace mongo {
+
+const OperationContext::Decoration<APIParameters> APIParameters::get =
+    OperationContext::declareDecoration<APIParameters>();
+
+APIParameters APIParameters::fromClient(const APIParametersFromClient& apiParamsFromClient) {
+    APIParameters apiParameters = APIParameters();
+    auto apiVersion = apiParamsFromClient.getApiVersion();
+    auto apiStrict = apiParamsFromClient.getApiStrict();
+    auto apiDeprecationErrors = apiParamsFromClient.getApiDeprecationErrors();
+
+    if (apiVersion) {
+        apiParameters.setAPIVersion(apiVersion.value());
+    }
+
+    if (apiStrict) {
+        apiParameters.setAPIStrict(apiStrict.value());
+    }
+
+    if (apiDeprecationErrors) {
+        apiParameters.setAPIDeprecationErrors(apiDeprecationErrors.value());
+    }
+
+    return apiParameters;
+}
+
+APIParameters APIParameters::fromBSON(const BSONObj& cmdObj) {
+    return APIParameters::fromClient(
+        APIParametersFromClient::parse("APIParametersFromClient"_sd, cmdObj));
+}
+
+void APIParameters::appendInfo(BSONObjBuilder* builder) const {
+    if (_apiVersion) {
+        builder->append(kAPIVersionFieldName, *_apiVersion);
+    }
+    if (_apiStrict) {
+        builder->append(kAPIStrictFieldName, *_apiStrict);
+    }
+    if (_apiDeprecationErrors) {
+        builder->append(kAPIDeprecationErrorsFieldName, *_apiDeprecationErrors);
+    }
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/api_parameters.h b/src/mongo/db/api_parameters.h
new file mode 100644
index 00000000000..7539dcb345e
--- /dev/null
+++ b/src/mongo/db/api_parameters.h
@@ -0,0 +1,122 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/api_parameters_gen.h"
+#include "mongo/db/operation_context.h"
+
+namespace mongo {
+
+/**
+ * Decorates operation context with methods to retrieve apiVersion, apiStrict, and
+ * apiDeprecationErrors.
+ */
+class APIParameters {
+
+public:
+    static constexpr StringData kAPIVersionFieldName = "apiVersion"_sd;
+    static constexpr StringData kAPIStrictFieldName = "apiStrict"_sd;
+    static constexpr StringData kAPIDeprecationErrorsFieldName = "apiDeprecationErrors"_sd;
+
+    static const OperationContext::Decoration<APIParameters> get;
+    static APIParameters fromClient(const APIParametersFromClient& apiParamsFromClient);
+    static APIParameters fromBSON(const BSONObj& cmdObj);
+
+    void appendInfo(BSONObjBuilder* builder) const;
+
+    const boost::optional<std::string>& getAPIVersion() const {
+        return _apiVersion;
+    }
+
+    void setAPIVersion(StringData apiVersion) {
+        _apiVersion = apiVersion.toString();
+    }
+
+    const boost::optional<bool>& getAPIStrict() const {
+        return _apiStrict;
+    }
+
+    void setAPIStrict(bool apiStrict) {
+        _apiStrict = apiStrict;
+    }
+
+    const boost::optional<bool>& getAPIDeprecationErrors() const {
+        return _apiDeprecationErrors;
+    }
+
+    void setAPIDeprecationErrors(bool apiDeprecationErrors) {
+        _apiDeprecationErrors = apiDeprecationErrors;
+    }
+
+    const bool getParamsPassed() const {
+        return _apiVersion || _apiStrict || _apiDeprecationErrors;
+    }
+
+private:
+    boost::optional<std::string> _apiVersion;
+    boost::optional<bool> _apiStrict;
+    boost::optional<bool> _apiDeprecationErrors;
+};
+
+
+/**
+ * Temporarily remove the user's API parameters from an OperationContext.
+ */
+class IgnoreAPIParametersBlock {
+public:
+    IgnoreAPIParametersBlock() = delete;
+    IgnoreAPIParametersBlock(const IgnoreAPIParametersBlock&) = delete;
+    IgnoreAPIParametersBlock& operator=(const IgnoreAPIParametersBlock&) = delete;
+
+    explicit IgnoreAPIParametersBlock(OperationContext* opCtx) : _opCtx(opCtx) {
+        _apiParams = APIParameters::get(_opCtx);
+        APIParameters::get(_opCtx) = APIParameters();
+    }
+
+    void release() {
+        if (_released) {
+            return;
+        }
+
+        APIParameters::get(_opCtx) = _apiParams;
+        _released = true;
+    }
+
+    ~IgnoreAPIParametersBlock() {
+        release();
+    }
+
+private:
+    OperationContext* _opCtx;
+    APIParameters _apiParams;
+    bool _released = false;
+};
+
+}  // namespace mongo
diff --git a/src/mongo/db/initialize_api_parameters.idl b/src/mongo/db/api_parameters.idl
index cc3a3d13e6c..cc3a3d13e6c 100644
--- a/src/mongo/db/initialize_api_parameters.idl
+++ b/src/mongo/db/api_parameters.idl
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 617145e3abc..63a304a1f30 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -872,7 +872,9 @@ boost::optional<ResumeIndexInfo> MultiIndexBlock::_abortWithoutCleanup(Operation
 
 void MultiIndexBlock::_writeStateToDisk(OperationContext* opCtx) const {
     auto obj = _constructStateObject();
-    auto rs = opCtx->getServiceContext()->getStorageEngine()->makeTemporaryRecordStore(opCtx);
+    auto rs = opCtx->getServiceContext()
+                  ->getStorageEngine()
+                  ->makeTemporaryRecordStoreForResumableIndexBuild(opCtx);
 
     WriteUnitOfWork wuow(opCtx);
 
diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h
index 47444538dd5..367b87e933b 100644
--- a/src/mongo/db/catalog_raii.h
+++ b/src/mongo/db/catalog_raii.h
@@ -291,7 +291,7 @@ private:
 class ReadSourceScope {
 public:
     ReadSourceScope(OperationContext* opCtx,
-                    RecoveryUnit::ReadSource readSource = RecoveryUnit::ReadSource::kUnset,
+                    RecoveryUnit::ReadSource readSource,
                     boost::optional<Timestamp> provided = boost::none);
     ~ReadSourceScope();
 
diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp
index cc222301ca0..e767d1f30ca 100644
--- a/src/mongo/db/catalog_raii_test.cpp
+++ b/src/mongo/db/catalog_raii_test.cpp
@@ -230,7 +230,7 @@ public:
     }
 
 private:
-    ReadSource _source = ReadSource::kUnset;
+    ReadSource _source = ReadSource::kNoTimestamp;
     boost::optional<Timestamp> _timestamp;
 };
 
@@ -257,8 +257,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) {
     ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided);
     ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), Timestamp(1, 2));
     {
-        ReadSourceScope scope(opCtx());
-        ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset);
+        ReadSourceScope scope(opCtx(), ReadSource::kNoTimestamp);
+        ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoTimestamp);
 
         opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap);
         ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap);
diff --git a/src/mongo/db/clientcursor.h b/src/mongo/db/clientcursor.h
index ee2040764b6..f4d7960a759 100644
--- a/src/mongo/db/clientcursor.h
+++ b/src/mongo/db/clientcursor.h
@@ -32,10 +32,10 @@
 #include <boost/optional.hpp>
 #include <functional>
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/auth/privilege.h"
 #include "mongo/db/auth/user_name.h"
 #include "mongo/db/cursor_id.h"
-#include "mongo/db/initialize_api_parameters.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/logical_session_id.h"
 #include "mongo/db/query/plan_executor.h"
diff --git a/src/mongo/db/command_generic_argument.cpp b/src/mongo/db/command_generic_argument.cpp
index 8434b65a3c3..e15c2498a97 100644
--- a/src/mongo/db/command_generic_argument.cpp
+++ b/src/mongo/db/command_generic_argument.cpp
@@ -56,9 +56,9 @@ static constexpr std::array<SpecialArgRecord, 34> specials{{
     //                                       /-isGeneric
     //                                       |  /-stripFromRequest
     //                                       |  |  /-stripFromReply
-    {"apiVersion"_sd,                        1, 0, 0},
-    {"apiStrict"_sd,                         1, 0, 0},
-    {"apiDeprecationErrors"_sd,              1, 0, 0},
+    {"apiVersion"_sd,                        1, 1, 0},
+    {"apiStrict"_sd,                         1, 1, 0},
+    {"apiDeprecationErrors"_sd,              1, 1, 0},
     {"$audit"_sd,                            1, 1, 0},
     {"$client"_sd,                           1, 1, 0},
     {"$configServerState"_sd,                1, 1, 1},
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index f9ef6f72574..8d9a8de296b 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -867,6 +867,14 @@ Command::Command(StringData name, std::vector<StringData> aliases)
     globalCommandRegistry()->registerCommand(this, _name, _aliases);
 }
 
+const std::set<std::string>& Command::apiVersions() const {
+    return kNoApiVersions;
+}
+
+const std::set<std::string>& Command::deprecatedApiVersions() const {
+    return kNoApiVersions;
+}
+
 bool Command::hasAlias(const StringData& alias) const {
     return globalCommandRegistry()->findCommand(alias) == this;
 }
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h
index 1877556f356..06803dbad84 100644
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -358,15 +358,18 @@ public:
     /*
      * Returns the list of API versions that include this command.
      */
-    virtual const std::set<std::string>& apiVersions() const {
-        return kNoApiVersions;
-    }
+    virtual const std::set<std::string>& apiVersions() const;
 
     /*
      * Returns the list of API versions in which this command is deprecated.
      */
-    virtual const std::set<std::string>& deprecatedApiVersions() const {
-        return kNoApiVersions;
+    virtual const std::set<std::string>& deprecatedApiVersions() const;
+
+    /*
+     * Some commands permit any values for apiVersion, apiStrict, and apiDeprecationErrors.
+     */
+    virtual bool acceptsAnyApiVersionParameters() const {
+        return false;
     }
 
     /**
diff --git a/src/mongo/db/commands/test_api_version_2_commands.cpp b/src/mongo/db/commands/test_api_version_2_commands.cpp
index b2c79a7ef70..738e13b1366 100644
--- a/src/mongo/db/commands/test_api_version_2_commands.cpp
+++ b/src/mongo/db/commands/test_api_version_2_commands.cpp
@@ -27,8 +27,8 @@
  *    it in the license file.
  */
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters.h"
 
 namespace mongo {
 
diff --git a/src/mongo/db/commands/test_deprecation_command.cpp b/src/mongo/db/commands/test_deprecation_command.cpp
index 44e61edb4a0..74d93942ddd 100644
--- a/src/mongo/db/commands/test_deprecation_command.cpp
+++ b/src/mongo/db/commands/test_deprecation_command.cpp
@@ -27,8 +27,8 @@
  *    it in the license file.
  */
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters.h"
 
 namespace mongo {
 
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index a8329f4641d..22a9181f157 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -90,6 +90,10 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
                                                    const NamespaceStringOrUUID& nsOrUUID,
                                                    AutoGetCollectionViewMode viewMode,
                                                    Date_t deadline) {
+    // The caller was expecting to conflict with batch application before entering this function.
+    // i.e. the caller does not currently have a ShouldNotConflict... block in scope.
+    bool callerWasConflicting = opCtx->lockState()->shouldConflictWithSecondaryBatchApplication();
+
     // Don't take the ParallelBatchWriterMode lock when the server parameter is set and our
     // storage engine supports snapshot reads.
     if (gAllowSecondaryReadsDuringBatchApplication.load() &&
@@ -100,11 +104,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
     const auto collectionLockMode = getLockModeForQuery(opCtx, nsOrUUID.nss());
     _autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline);
 
-    // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do
-    // not consider changing our ReadSource (e.g. FTDC needs that).
-    if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp)
-        return;
-
     repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx);
     const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
 
@@ -154,6 +153,32 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
                                     << afterClusterTime->asTimestamp().toString());
         }
 
+        // This assertion protects operations from reading inconsistent data on secondaries when
+        // using the default ReadSource of kNoTimestamp.
+
+        // Reading at lastApplied on secondaries is the safest behavior and is enabled for all user
+        // and DBDirectClient reads using 'local' and 'available' readConcerns. If an internal
+        // operation wishes to read without a timestamp during a batch, a ShouldNotConflict can
+        // suppress this fatal assertion with the following considerations:
+        // * The operation is not reading replicated data in a replication state where batch
+        //   application is active OR
+        // * Reading inconsistent, out-of-order data is either inconsequential or required by
+        //   the operation.
+
+        // If the caller entered this function expecting to conflict with batch application
+        // (i.e. no ShouldNotConflict block in scope), but they are reading without a timestamp and
+        // not holding the PBWM lock, then there is a possibility that this reader may
+        // unintentionally see inconsistent data during a batch. Certain namespaces are applied
+        // serially in oplog application, and therefore can be safely read without taking the PBWM
+        // lock or reading at a timestamp.
+        if (readSource == RecoveryUnit::ReadSource::kNoTimestamp && callerWasConflicting &&
+            !nss.mustBeAppliedInOwnOplogBatch() &&
+            SnapshotHelper::shouldReadAtLastApplied(opCtx, nss)) {
+            LOGV2_FATAL(4728700,
+                        "Reading from replicated collection without read timestamp or PBWM lock",
+                        "collection"_attr = nss);
+        }
+
         auto minSnapshot = coll->getMinimumVisibleSnapshot();
         if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) {
             return;
diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp
index b101ce91961..eba322c5581 100644
--- a/src/mongo/db/db_raii_test.cpp
+++ b/src/mongo/db/db_raii_test.cpp
@@ -42,6 +42,7 @@
 #include "mongo/db/query/internal_plans.h"
 #include "mongo/db/storage/snapshot_manager.h"
 #include "mongo/logv2/log.h"
+#include "mongo/unittest/death_test.h"
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/time_support.h"
 
@@ -219,6 +220,8 @@ TEST_F(DBRAIITestFixture,
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
 
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client2.first->setInDirectClient(true);
     AutoGetCollectionForRead coll(client2.second.get(), nss);
 }
 
@@ -239,6 +242,8 @@ TEST_F(DBRAIITestFixture,
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
 
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client2.first->setInDirectClient(true);
     AutoGetCollectionForRead coll(client2.second.get(), nss);
 }
 
@@ -266,10 +271,12 @@ TEST_F(DBRAIITestFixture,
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
 
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client2.first->setInDirectClient(true);
     AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js"));
     // Reading from an unreplicated collection does not change the ReadSource to kLastApplied.
     ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(),
-              RecoveryUnit::ReadSource::kUnset);
+              RecoveryUnit::ReadSource::kNoTimestamp);
 
     // Reading from a replicated collection will try to switch to kLastApplied. Because we are
     // already reading without a timestamp and we can't reacquire the PBWM lock to continue reading
@@ -300,12 +307,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) {
     auto snapshotManager =
         client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
     snapshotManager->setLastApplied(opTime.getTimestamp());
+
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client1.first->setInDirectClient(true);
     AutoGetCollectionForRead coll(client1.second.get(), nss);
 
     // We can't read from kLastApplied in this scenario because there is a catalog conflict. Resort
     // to taking the PBWM lock and reading without a timestamp.
     ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
-              RecoveryUnit::ReadSource::kUnset);
+              RecoveryUnit::ReadSource::kNoTimestamp);
     ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode(
         resourceIdParallelBatchWriterMode, MODE_IS));
 }
@@ -325,6 +335,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) {
     auto snapshotManager =
         client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
     ASSERT_FALSE(snapshotManager->getLastApplied());
+
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client1.first->setInDirectClient(true);
     AutoGetCollectionForRead coll(client1.second.get(), nss);
 
     ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
@@ -334,6 +347,33 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) {
         resourceIdParallelBatchWriterMode, MODE_IS));
 }
 
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadOplogOnSecondary) {
+    // This test simulates a situation where AutoGetCollectionForRead reads at lastApplied on a
+    // secondary.
+    auto replCoord = repl::ReplicationCoordinator::get(client1.second.get());
+    ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Ensure the default ReadSource is used.
+    ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kNoTimestamp);
+
+    // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock
+    // class and does not update the correct state in the SnapshotManager.
+    repl::OpTime opTime(Timestamp(2, 1), 1);
+    auto snapshotManager =
+        client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+    snapshotManager->setLastApplied(opTime.getTimestamp());
+
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    client1.first->setInDirectClient(true);
+    AutoGetCollectionForRead coll(client1.second.get(), NamespaceString::kRsOplogNamespace);
+
+    ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kLastApplied);
+    ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode(
+        resourceIdParallelBatchWriterMode, MODE_IS));
+}
+
 TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) {
     auto opCtx = client1.second.get();
 
@@ -342,11 +382,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) {
     CollectionOptions options;
     options.capped = true;
     ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options));
+
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    opCtx->getClient()->setInDirectClient(true);
     AutoGetCollectionForRead autoColl(opCtx, nss);
     auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection());
 
     // The collection scan should use the default ReadSource on a primary.
-    ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource());
+    ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp,
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     // When the tailable query recovers from its yield, it should discover that the node is
     // secondary and change its read source.
@@ -373,6 +417,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp)
     ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options));
     ASSERT_OK(
         repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Simulate using a DBDirectClient to test this behavior for user reads.
+    opCtx->getClient()->setInDirectClient(true);
     AutoGetCollectionForRead autoColl(opCtx, nss);
     auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection());
 
@@ -390,9 +437,36 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp)
 
     // After restoring, the collection scan should now be reading with kUnset, the default on
     // primaries.
-    ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource());
+    ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp,
+              opCtx->recoveryUnit()->getTimestampReadSource());
     ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&unused, nullptr));
 }
 
+DEATH_TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUnsafe, "Fatal assertion") {
+    auto opCtx = client1.second.get();
+    ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {}));
+
+    ASSERT_OK(
+        repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Non-user read on a replicated collection should fail because we are reading on a secondary
+    // without a timestamp.
+    AutoGetCollectionForRead autoColl(opCtx, nss);
+}
+
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadSafe) {
+    auto opCtx = client1.second.get();
+    ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {}));
+
+    ASSERT_OK(
+        repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Non-user read on a replicated collection should not fail because of the ShouldNotConflict
+    // block.
+    ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
+
+    AutoGetCollectionForRead autoColl(opCtx, nss);
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/dbdirectclient.cpp b/src/mongo/db/dbdirectclient.cpp
index 5386bf567d2..bb1f5553906 100644
--- a/src/mongo/db/dbdirectclient.cpp
+++ b/src/mongo/db/dbdirectclient.cpp
@@ -143,6 +143,7 @@ DbResponse loopbackBuildResponse(OperationContext* const opCtx,
 
     toSend.header().setId(nextMessageId());
     toSend.header().setResponseToMsgId(0);
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
     return opCtx->getServiceContext()->getServiceEntryPoint()->handleRequest(opCtx, toSend).get();
 }
 }  // namespace
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index 5c598445272..6b517f293b5 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -359,7 +359,7 @@ static stdx::unordered_map<std::string, BuiltinFn> kBuiltinFunctions = {
     {"addToArray", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToArray, true}},
     {"addToSet", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToSet, true}},
     {"doubleDoubleSum",
-     BuiltinFn{[](size_t n) { return n > 0; }, vm::Builtin::doubleDoubleSum, true}},
+     BuiltinFn{[](size_t n) { return n > 0; }, vm::Builtin::doubleDoubleSum, false}},
     {"bitTestZero", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::bitTestZero, false}},
     {"bitTestMask", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::bitTestMask, false}},
     {"bitTestPosition",
@@ -402,6 +402,7 @@ static stdx::unordered_map<std::string, InstrFn> kInstrFunctions = {
      InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsNumber, false}},
     {"isBinData",
      InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsBinData, false}},
+    {"isDate", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendIsDate, false}},
     {"sum", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendSum, true}},
     {"min", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendMin, true}},
     {"max", InstrFn{[](size_t n) { return n == 1; }, &vm::CodeFragment::appendMax, true}},
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.h b/src/mongo/db/exec/sbe/stages/loop_join.h
index bf19c50b8f2..0f94d39a9c1 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.h
+++ b/src/mongo/db/exec/sbe/stages/loop_join.h
@@ -57,8 +57,7 @@ public:
 private:
     // Set of variables coming from the outer side.
     const value::SlotVector _outerProjects;
-    // Set of correlated variables from the outer side that are visible on the inner side. They must
-    // be also present in the _outerProjects.
+    // Set of correlated variables from the outer side that are visible on the inner side.
     const value::SlotVector _outerCorrelated;
     // If not set then this is a cross product.
     const std::unique_ptr<EExpression> _predicate;
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index b5890497f45..ba7c849431b 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -96,6 +96,7 @@ int Instruction::stackOffset[Instruction::Tags::lastInstruction] = {
     0,  // isString
     0,  // isNumber
     0,  // isBinData
+    0,  // isDate
     0,  // typeMatch
 
     0,  // function is special, the stack offset is encoded in the instruction itself
@@ -314,6 +315,10 @@ void CodeFragment::appendIsBinData() {
     appendSimpleInstruction(Instruction::isBinData);
 }
 
+void CodeFragment::appendIsDate() {
+    appendSimpleInstruction(Instruction::isDate);
+}
+
 void CodeFragment::appendTypeMatch(uint32_t typeMask) {
     Instruction i;
     i.tag = Instruction::typeMatch;
@@ -1814,6 +1819,18 @@ std::tuple<uint8_t, value::TypeTags, value::Value> ByteCode::run(const CodeFragm
                     }
                     break;
                 }
+                case Instruction::isDate: {
+                    auto [owned, tag, val] = getFromStack(0);
+
+                    if (tag != value::TypeTags::Nothing) {
+                        topStack(false, value::TypeTags::Boolean, tag == value::TypeTags::Date);
+                    }
+
+                    if (owned) {
+                        value::releaseValue(tag, val);
+                    }
+                    break;
+                }
                 case Instruction::typeMatch: {
                     auto typeMask = value::readFromMemory<uint32_t>(pcPointer);
                     pcPointer += sizeof(typeMask);
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index a5197d17437..e4590a79c71 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -149,6 +149,7 @@ struct Instruction {
         isString,
         isNumber,
         isBinData,
+        isDate,
         typeMatch,
 
         function,
@@ -259,6 +260,7 @@ public:
     void appendIsString();
     void appendIsNumber();
     void appendIsBinData();
+    void appendIsDate();
     void appendTypeMatch(uint32_t typeMask);
     void appendFunction(Builtin f, uint8_t arity);
     void appendJump(int jumpOffset);
diff --git a/src/mongo/db/free_mon/free_mon_storage.cpp b/src/mongo/db/free_mon/free_mon_storage.cpp
index 7c25c6a671c..89be39295e1 100644
--- a/src/mongo/db/free_mon/free_mon_storage.cpp
+++ b/src/mongo/db/free_mon/free_mon_storage.cpp
@@ -57,6 +57,10 @@ boost::optional<FreeMonStorageState> FreeMonStorage::read(OperationContext* opCt
 
     auto storageInterface = repl::StorageInterface::get(opCtx);
 
+    // Ensure we read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
+
     AutoGetCollectionForRead autoRead(opCtx, NamespaceString::kServerConfigurationNamespace);
 
     auto swObj = storageInterface->findById(
diff --git a/src/mongo/db/ftdc/collector.cpp b/src/mongo/db/ftdc/collector.cpp
index 37dd68b136e..11ba9d4d3a4 100644
--- a/src/mongo/db/ftdc/collector.cpp
+++ b/src/mongo/db/ftdc/collector.cpp
@@ -70,8 +70,9 @@ std::tuple<BSONObj, Date_t> FTDCCollectorCollection::collect(Client* client) {
     ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock(opCtx->lockState());
     opCtx->lockState()->skipAcquireTicket();
 
-    // Explicitly start future read transactions without a timestamp.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     for (auto& collector : _collectors) {
         BSONObjBuilder subObjBuilder(builder.subobjStart(collector->name()));
diff --git a/src/mongo/db/index_build_entry_helpers.cpp b/src/mongo/db/index_build_entry_helpers.cpp
index da3f43b29e2..fc689873f6e 100644
--- a/src/mongo/db/index_build_entry_helpers.cpp
+++ b/src/mongo/db/index_build_entry_helpers.cpp
@@ -254,7 +254,8 @@ Status removeIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) {
 
 StatusWith<IndexBuildEntry> getIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) {
     // Read the most up to date data.
-    ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
     AutoGetCollectionForRead autoCollection(opCtx, NamespaceString::kIndexBuildEntryNamespace);
     const Collection* collection = autoCollection.getCollection();
 
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index c8caafc318f..d27dd0848db 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2553,7 +2553,8 @@ void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx,
 
     // Read without a timestamp. When we commit, we block writes which guarantees all writes are
     // visible.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
     // The collection scan might read with a kMajorityCommitted read source, but will restore
     // kNoTimestamp afterwards.
     _scanCollectionAndInsertSortedKeysIntoIndex(opCtx, replState);
@@ -2655,7 +2656,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
         uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
             opCtx,
             replState->buildUUID,
-            RecoveryUnit::ReadSource::kUnset,
+            RecoveryUnit::ReadSource::kNoTimestamp,
             IndexBuildInterceptor::DrainYieldPolicy::kYield));
     }
 
@@ -2681,7 +2682,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
         uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
             opCtx,
             replState->buildUUID,
-            RecoveryUnit::ReadSource::kUnset,
+            RecoveryUnit::ReadSource::kNoTimestamp,
             IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
     }
 
@@ -2769,7 +2770,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
     uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
         opCtx,
         replState->buildUUID,
-        RecoveryUnit::ReadSource::kUnset,
+        RecoveryUnit::ReadSource::kNoTimestamp,
         IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
 
     try {
@@ -2916,7 +2917,7 @@ StatusWith<std::pair<long long, long long>> IndexBuildsCoordinator::_runIndexReb
         uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
             opCtx,
             replState->buildUUID,
-            RecoveryUnit::ReadSource::kUnset,
+            RecoveryUnit::ReadSource::kNoTimestamp,
             IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
 
         uassertStatusOK(
diff --git a/src/mongo/db/initialize_api_parameters.cpp b/src/mongo/db/initialize_api_parameters.cpp
index 11a5b68ae10..575fd476827 100644
--- a/src/mongo/db/initialize_api_parameters.cpp
+++ b/src/mongo/db/initialize_api_parameters.cpp
@@ -27,8 +27,17 @@
  *    it in the license file.
  */
 
+#include "mongo/platform/basic.h"
+
 #include "mongo/db/initialize_api_parameters.h"
 
+#include <string>
+
+#include "mongo/db/commands.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/util/assert_util.h"
+#include "mongo/util/str.h"
+
 namespace mongo {
 
 const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
@@ -44,6 +53,10 @@ const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
             apiParamsFromClient.getApiVersion());
     }
 
+    if (command->acceptsAnyApiVersionParameters()) {
+        return apiParamsFromClient;
+    }
+
     if (apiParamsFromClient.getApiDeprecationErrors() || apiParamsFromClient.getApiStrict()) {
         uassert(4886600,
                 "Provided apiStrict and/or apiDeprecationErrors without passing apiVersion",
@@ -88,44 +101,4 @@ const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
     return apiParamsFromClient;
 }
 
-const OperationContext::Decoration<APIParameters> handle =
-    OperationContext::declareDecoration<APIParameters>();
-
-APIParameters& APIParameters::get(OperationContext* opCtx) {
-    return handle(opCtx);
-}
-
-APIParameters APIParameters::fromClient(const APIParametersFromClient& apiParamsFromClient) {
-    APIParameters apiParameters = APIParameters();
-    auto apiVersion = apiParamsFromClient.getApiVersion();
-    auto apiStrict = apiParamsFromClient.getApiStrict();
-    auto apiDeprecationErrors = apiParamsFromClient.getApiDeprecationErrors();
-
-    if (apiVersion) {
-        apiParameters.setAPIVersion(apiVersion.value());
-    }
-
-    if (apiStrict) {
-        apiParameters.setAPIStrict(apiStrict.value());
-    }
-
-    if (apiDeprecationErrors) {
-        apiParameters.setAPIDeprecationErrors(apiDeprecationErrors.value());
-    }
-
-    return apiParameters;
-}
-
-void APIParameters::appendInfo(BSONObjBuilder* builder) const {
-    if (_apiVersion) {
-        builder->append(kAPIVersionFieldName, *_apiVersion);
-    }
-    if (_apiStrict) {
-        builder->append(kAPIStrictFieldName, *_apiStrict);
-    }
-    if (_apiDeprecationErrors) {
-        builder->append(kAPIDeprecationErrorsFieldName, *_apiDeprecationErrors);
-    }
-}
-
 }  // namespace mongo
diff --git a/src/mongo/db/initialize_api_parameters.h b/src/mongo/db/initialize_api_parameters.h
index 73215f607c8..e62d0defecc 100644
--- a/src/mongo/db/initialize_api_parameters.h
+++ b/src/mongo/db/initialize_api_parameters.h
@@ -29,73 +29,19 @@
 
 #pragma once
 
-#include "mongo/db/commands.h"
-#include "mongo/db/initialize_api_parameters_gen.h"
-#include "mongo/db/operation_context.h"
+#include "api_parameters.h"
 
 namespace mongo {
 
+class BSONObj;
+class Command;
+class OperationContext;
+
 /**
- * See VERSIONED_API_README.md for an overview of the Versioned API.
- *
- * This function parses a command's API Version parameters from a request and stores the apiVersion,
+ * Parse a command's API Version parameters from a request and store the apiVersion,
  * apiStrict, and apiDeprecationErrors fields.
  */
 const APIParametersFromClient initializeAPIParameters(OperationContext* opCtx,
                                                       const BSONObj& requestBody,
                                                       Command* command);
-
-/**
- * Decorates operation context with methods to retrieve apiVersion, apiStrict, and
- * apiDeprecationErrors.
- */
-class APIParameters {
-
-public:
-    static constexpr StringData kAPIVersionFieldName = "apiVersion"_sd;
-    static constexpr StringData kAPIStrictFieldName = "apiStrict"_sd;
-    static constexpr StringData kAPIDeprecationErrorsFieldName = "apiDeprecationErrors"_sd;
-
-    APIParameters() = default;
-    static APIParameters& get(OperationContext* opCtx);
-    static APIParameters fromClient(const APIParametersFromClient& apiParamsFromClient);
-
-    void appendInfo(BSONObjBuilder* builder) const;
-
-    const boost::optional<std::string>& getAPIVersion() const {
-        return _apiVersion;
-    }
-
-    void setAPIVersion(StringData apiVersion) {
-        _apiVersion = apiVersion.toString();
-    }
-
-    const boost::optional<bool>& getAPIStrict() const {
-        return _apiStrict;
-    }
-
-    void setAPIStrict(bool apiStrict) {
-        _apiStrict = apiStrict;
-    }
-
-    const boost::optional<bool>& getAPIDeprecationErrors() const {
-        return _apiDeprecationErrors;
-    }
-
-    void setAPIDeprecationErrors(bool apiDeprecationErrors) {
-        _apiDeprecationErrors = apiDeprecationErrors;
-    }
-
-    bool getParamsPassed() const {
-        return _apiVersion || _apiStrict || _apiDeprecationErrors;
-    }
-
-    BSONObj toBSON() const;
-
-private:
-    boost::optional<std::string> _apiVersion;
-    boost::optional<bool> _apiStrict;
-    boost::optional<bool> _apiDeprecationErrors;
-};
-
 }  // namespace mongo
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index f0722782157..e499d04881a 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -404,6 +404,9 @@ Status storeMongodOptions(const moe::Environment& params) {
 
     if (params.count("storage.syncPeriodSecs")) {
         storageGlobalParams.syncdelay = params["storage.syncPeriodSecs"].as<double>();
+        storageGlobalParams.checkpointDelaySecs =
+            static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
+
         if (storageGlobalParams.syncdelay < 0 ||
             storageGlobalParams.syncdelay > StorageGlobalParams::kMaxSyncdelaySecs) {
             return Status(ErrorCodes::BadValue,
diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp
index 9471aca909c..bee7df5ca40 100644
--- a/src/mongo/db/namespace_string.cpp
+++ b/src/mongo/db/namespace_string.cpp
@@ -144,6 +144,18 @@ bool NamespaceString::isLegalClientSystemNS() const {
     return false;
 }
 
+/**
+ * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately
+ * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog
+ * entries on 'system.views' are being applied out of the original order.
+ *
+ * Process updates to 'admin.system.version' individually as well so the secondary's FCV when
+ * processing each operation matches the primary's when committing that operation.
+ */
+bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
+    return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection();
+}
+
 NamespaceString NamespaceString::makeListCollectionsNSS(StringData dbName) {
     NamespaceString nss(dbName, listCollectionsCursorCol);
     dassert(nss.isValid());
diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h
index a43406f8bd4..e5de9877c84 100644
--- a/src/mongo/db/namespace_string.h
+++ b/src/mongo/db/namespace_string.h
@@ -338,6 +338,11 @@ public:
     bool isDropPendingNamespace() const;
 
     /**
+     * Returns true if operations on this namespace must be applied in their own oplog batch.
+     */
+    bool mustBeAppliedInOwnOplogBatch() const;
+
+    /**
      * Returns the drop-pending namespace name for this namespace, provided the given optime.
      *
      * Example:
diff --git a/src/mongo/db/pipeline/document_source_writer.h b/src/mongo/db/pipeline/document_source_writer.h
index 9c175890ecf..b91c49a90db 100644
--- a/src/mongo/db/pipeline/document_source_writer.h
+++ b/src/mongo/db/pipeline/document_source_writer.h
@@ -65,7 +65,7 @@ public:
         }
 
         repl::ReadConcernArgs::get(_opCtx) = repl::ReadConcernArgs();
-        _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::kUnset);
+        _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     }
 
     ~DocumentSourceWriteBlock() {
diff --git a/src/mongo/db/pipeline/expression_context.h b/src/mongo/db/pipeline/expression_context.h
index 6cd1bba4f3b..5140d8ea32f 100644
--- a/src/mongo/db/pipeline/expression_context.h
+++ b/src/mongo/db/pipeline/expression_context.h
@@ -328,10 +328,6 @@ public:
     // 'jsHeapLimitMB' server parameter.
     boost::optional<int> jsHeapLimitMB;
 
-    // When set this timeout limits the allowed execution time for a JavaScript function invocation
-    // under any Scope returned by getJsExecWithScope().
-    int jsFnTimeoutMillis;
-
     // An interface for accessing information or performing operations that have different
     // implementations on mongod and mongos, or that only make sense on one of the two.
     // Additionally, putting some of this functionality behind an interface prevents aggregation
diff --git a/src/mongo/db/pipeline/process_interface/common_process_interface.cpp b/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
index 330ef41693e..b6b304c348b 100644
--- a/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
+++ b/src/mongo/db/pipeline/process_interface/common_process_interface.cpp
@@ -184,15 +184,11 @@ bool CommonProcessInterface::keyPatternNamesExactPaths(const BSONObj& keyPattern
 
 boost::optional<ChunkVersion> CommonProcessInterface::refreshAndGetCollectionVersion(
     const boost::intrusive_ptr<ExpressionContext>& expCtx, const NamespaceString& nss) const {
-    const bool forceRefreshFromThisThread = false;
-    auto cm = uassertStatusOK(
-        Grid::get(expCtx->opCtx)
-            ->catalogCache()
-            ->getCollectionRoutingInfoWithRefresh(expCtx->opCtx, nss, forceRefreshFromThisThread));
-    if (cm.isSharded()) {
-        return cm.getVersion();
-    }
-    return boost::none;
+    const auto cm = uassertStatusOK(Grid::get(expCtx->opCtx)
+                                        ->catalogCache()
+                                        ->getCollectionRoutingInfoWithRefresh(expCtx->opCtx, nss));
+
+    return cm.isSharded() ? boost::make_optional(cm.getVersion()) : boost::none;
 }
 
 std::vector<FieldPath> CommonProcessInterface::_shardKeyToDocumentKeyFields(
diff --git a/src/mongo/db/pipeline/sharded_agg_helpers.h b/src/mongo/db/pipeline/sharded_agg_helpers.h
index 13a20fee607..c63ac997a32 100644
--- a/src/mongo/db/pipeline/sharded_agg_helpers.h
+++ b/src/mongo/db/pipeline/sharded_agg_helpers.h
@@ -245,13 +245,9 @@ auto shardVersionRetry(OperationContext* opCtx,
                           str::stream() << "StaleConfig error on unexpected namespace. Expected "
                                         << nss << ", received " << staleInfo->getNss());
                 catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                    opCtx,
-                    nss,
-                    staleInfo->getVersionWanted(),
-                    staleInfo->getVersionReceived(),
-                    staleInfo->getShardId());
+                    nss, staleInfo->getVersionWanted(), staleInfo->getShardId());
             } else {
-                catalogCache->onEpochChange(nss);
+                catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
             }
             if (!logAndTestMaxRetries(e)) {
                 throw;
diff --git a/src/mongo/db/query/SConscript b/src/mongo/db/query/SConscript
index a82d443d5bf..bfb2bc1dc4c 100644
--- a/src/mongo/db/query/SConscript
+++ b/src/mongo/db/query/SConscript
@@ -180,6 +180,7 @@ env.Library(
     ],
     LIBDEPS=[
         "$BUILD_DIR/mongo/base",
+        "$BUILD_DIR/mongo/db/api_parameters",
         "$BUILD_DIR/mongo/db/catalog/collection_catalog",
         # TODO: This dependency edge can be removed when the 'allowDiskUse' option no longer depends
         # on enabling test commands.
diff --git a/src/mongo/db/query/optimizer/SConscript b/src/mongo/db/query/optimizer/SConscript
index 175b109625d..0863192a593 100644
--- a/src/mongo/db/query/optimizer/SConscript
+++ b/src/mongo/db/query/optimizer/SConscript
@@ -8,6 +8,7 @@ env.Library(
     target="optimizer",
     source=[
         "defs.cpp",
+        "memo.cpp",
         "node.cpp",
     ],
     LIBDEPS=[
diff --git a/src/mongo/db/query/optimizer/algebra/operator.h b/src/mongo/db/query/optimizer/algebra/operator.h
new file mode 100644
index 00000000000..524b7246413
--- /dev/null
+++ b/src/mongo/db/query/optimizer/algebra/operator.h
@@ -0,0 +1,305 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <vector>
+
+#include "mongo/db/query/optimizer/algebra/polyvalue.h"
+
+namespace mongo::optimizer {
+namespace algebra {
+
+template <typename T, int S>
+struct OpNodeStorage {
+    T _nodes[S];
+
+    template <typename... Ts>
+    OpNodeStorage(Ts&&... vals) : _nodes{std::forward<Ts>(vals)...} {}
+};
+
+template <typename T>
+struct OpNodeStorage<T, 0> {};
+
+/*=====-----
+ *
+ * Arity of operator can be:
+ * 1. statically known - A, A, A, ...
+ * 2. dynamic prefix with optional statically know - vector<A>, A, A, A, ...
+ *
+ * Denotations map A to some B.
+ * So static arity <A,A,A> is mapped to <B,B,B>.
+ * Similarly, arity <vector<A>,A> is mapped to <vector<B>,B>
+ *
+ * There is a wrinkle when B is a reference (if allowed at all)
+ * Arity <vector<A>, A, A> is mapped to <vector<B>&, B&, B&> - note that the reference is lifted
+ * outside of the vector.
+ *
+ */
+template <typename Slot, typename Derived, int Arity>
+class OpSpecificArity : public OpNodeStorage<Slot, Arity> {
+    using Base = OpNodeStorage<Slot, Arity>;
+
+public:
+    template <typename... Ts>
+    OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {
+        static_assert(sizeof...(Ts) == Arity, "constructor paramaters do not match");
+    }
+
+    template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+    auto& get() noexcept {
+        return this->_nodes[I];
+    }
+
+    template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+    const auto& get() const noexcept {
+        return this->_nodes[I];
+    }
+};
+/*=====-----
+ *
+ * Operator with dynamic arity
+ *
+ */
+template <typename Slot, typename Derived, int Arity>
+class OpSpecificDynamicArity : public OpSpecificArity<Slot, Derived, Arity> {
+    using Base = OpSpecificArity<Slot, Derived, Arity>;
+
+    std::vector<Slot> _dyNodes;
+
+public:
+    template <typename... Ts>
+    OpSpecificDynamicArity(std::vector<Slot> nodes, Ts&&... vals)
+        : Base({std::forward<Ts>(vals)...}), _dyNodes(std::move(nodes)) {}
+
+    auto& nodes() {
+        return _dyNodes;
+    }
+    const auto& nodes() const {
+        return _dyNodes;
+    }
+};
+
+/*=====-----
+ *
+ * Semantic transport interface
+ *
+ */
+namespace detail {
+template <typename D, typename T, typename = std::void_t<>>
+struct has_prepare : std::false_type {};
+template <typename D, typename T>
+struct has_prepare<D, T, std::void_t<decltype(std::declval<D>().prepare(std::declval<T&>()))>>
+    : std::true_type {};
+
+template <typename D, typename T>
+inline constexpr auto has_prepare_v = has_prepare<D, T>::value;
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr int get_arity(const OpSpecificArity<Slot, Derived, Arity>*) {
+    return Arity;
+}
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Derived, Arity>*) {
+    return false;
+}
+
+template <typename Slot, typename Derived, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Derived, Arity>*) {
+    return true;
+}
+
+template <typename T>
+using OpConcreteType = typename std::remove_reference_t<T>::template get_t<0>;
+}  // namespace detail
+
+template <typename D, bool withSlot>
+class OpTransporter {
+    D& _domain;
+
+    template <typename T, bool B>
+    struct Deducer {};
+    template <typename T>
+    struct Deducer<T, true> {
+        using type = decltype(std::declval<D>().transport(
+            std::declval<T>(), std::declval<detail::OpConcreteType<T>&>()));
+    };
+    template <typename T>
+    struct Deducer<T, false> {
+        using type =
+            decltype(std::declval<D>().transport(std::declval<detail::OpConcreteType<T>&>()));
+    };
+    template <typename T>
+    using deduced_t = typename Deducer<T, withSlot>::type;
+
+    template <typename N, typename T, typename... Ts>
+    auto transformStep(N&& slot, T&& op, Ts&&... args) {
+        if constexpr (withSlot) {
+            return _domain.transport(
+                std::forward<N>(slot), std::forward<T>(op), std::forward<Ts>(args)...);
+        } else {
+            return _domain.transport(std::forward<T>(op), std::forward<Ts>(args)...);
+        }
+    }
+
+    template <typename N, typename T, size_t... I>
+    auto transportUnpack(N&& slot, T&& op, std::index_sequence<I...>) {
+        return transformStep(
+            std::forward<N>(slot), std::forward<T>(op), op.template get<I>().visit(*this)...);
+    }
+    template <typename N, typename T, size_t... I>
+    auto transportDynamicUnpack(N&& slot, T&& op, std::index_sequence<I...>) {
+        std::vector<decltype(slot.visit(*this))> v;
+        for (auto& node : op.nodes()) {
+            v.emplace_back(node.visit(*this));
+        }
+        return transformStep(std::forward<N>(slot),
+                             std::forward<T>(op),
+                             std::move(v),
+                             op.template get<I>().visit(*this)...);
+    }
+    template <typename N, typename T, size_t... I>
+    void transportUnpackVoid(N&& slot, T&& op, std::index_sequence<I...>) {
+        (op.template get<I>().visit(*this), ...);
+        return transformStep(std::forward<N>(slot), std::forward<T>(op), op.template get<I>()...);
+    }
+    template <typename N, typename T, size_t... I>
+    void transportDynamicUnpackVoid(N&& slot, T&& op, std::index_sequence<I...>) {
+        for (auto& node : op.nodes()) {
+            node.visit(*this);
+        }
+        (op.template get<I>().visit(*this), ...);
+        return transformStep(
+            std::forward<N>(slot), std::forward<T>(op), op.nodes(), op.template get<I>()...);
+    }
+
+public:
+    OpTransporter(D& domain) : _domain(domain) {}
+
+    template <typename N, typename T, typename R = deduced_t<N>>
+    R operator()(N&& slot, T&& op) {
+        // N is either `PolyValue<Ts...>&` or `const PolyValue<Ts...>&` i.e. reference
+        // T is either `A&` or `const A&` where A is one of Ts
+        using type = std::remove_reference_t<T>;
+
+        constexpr int arity = detail::get_arity(static_cast<type*>(nullptr));
+        constexpr bool is_dynamic = detail::is_dynamic(static_cast<type*>(nullptr));
+
+        if constexpr (detail::has_prepare_v<D, type>) {
+            _domain.prepare(std::forward<T>(op));
+        }
+        if constexpr (is_dynamic) {
+            if constexpr (std::is_same_v<R, void>) {
+                return transportDynamicUnpackVoid(
+                    std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+            } else {
+                return transportDynamicUnpack(
+                    std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+            }
+        } else {
+            if constexpr (std::is_same_v<R, void>) {
+                return transportUnpackVoid(
+                    std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+            } else {
+                return transportUnpack(
+                    std::forward<N>(slot), std::forward<T>(op), std::make_index_sequence<arity>{});
+            }
+        }
+    }
+};
+
+template <typename D, bool withSlot>
+class OpWalker {
+    D& _domain;
+
+    template <typename N, typename T, typename... Ts>
+    auto walkStep(N&& slot, T&& op, Ts&&... args) {
+        if constexpr (withSlot) {
+            return _domain.walk(
+                std::forward<N>(slot), std::forward<T>(op), std::forward<Ts>(args)...);
+        } else {
+            return _domain.walk(std::forward<T>(op), std::forward<Ts>(args)...);
+        }
+    }
+
+    template <typename N, typename T, typename... Args, size_t... I>
+    auto walkUnpack(N&& slot, T&& op, std::index_sequence<I...>, Args&&... args) {
+        return walkStep(std::forward<N>(slot),
+                        std::forward<T>(op),
+                        std::forward<Args>(args)...,
+                        op.template get<I>()...);
+    }
+    template <typename N, typename T, typename... Args, size_t... I>
+    auto walkDynamicUnpack(N&& slot, T&& op, std::index_sequence<I...>, Args&&... args) {
+        return walkStep(std::forward<N>(slot),
+                        std::forward<T>(op),
+                        std::forward<Args>(args)...,
+                        op.nodes(),
+                        op.template get<I>()...);
+    }
+
+public:
+    OpWalker(D& domain) : _domain(domain) {}
+
+    template <typename N, typename T, typename... Args>
+    auto operator()(N&& slot, T&& op, Args&&... args) {
+        // N is either `PolyValue<Ts...>&` or `const PolyValue<Ts...>&` i.e. reference
+        // T is either `A&` or `const A&` where A is one of Ts
+        using type = std::remove_reference_t<T>;
+
+        constexpr int arity = detail::get_arity(static_cast<type*>(nullptr));
+        constexpr bool is_dynamic = detail::is_dynamic(static_cast<type*>(nullptr));
+
+        if constexpr (is_dynamic) {
+            return walkDynamicUnpack(std::forward<N>(slot),
+                                     std::forward<T>(op),
+                                     std::make_index_sequence<arity>{},
+                                     std::forward<Args>(args)...);
+        } else {
+            return walkUnpack(std::forward<N>(slot),
+                              std::forward<T>(op),
+                              std::make_index_sequence<arity>{},
+                              std::forward<Args>(args)...);
+        }
+    }
+};
+
+template <bool withSlot = false, typename D, typename N>
+auto transport(N&& node, D& domain) {
+    return node.visit(OpTransporter<D, withSlot>{domain});
+}
+
+template <bool withSlot = false, typename D, typename N, typename... Args>
+auto walk(N&& node, D& domain, Args&&... args) {
+    return node.visit(OpWalker<D, withSlot>{domain}, std::forward<Args>(args)...);
+}
+
+}  // namespace algebra
+}  // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/algebra/polyvalue.h b/src/mongo/db/query/optimizer/algebra/polyvalue.h
new file mode 100644
index 00000000000..374041c5704
--- /dev/null
+++ b/src/mongo/db/query/optimizer/algebra/polyvalue.h
@@ -0,0 +1,381 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <array>
+#include <stdexcept>
+#include <type_traits>
+
+namespace mongo::optimizer {
+namespace algebra {
+namespace detail {
+
+template <typename T, typename... Args>
+inline constexpr bool is_one_of_v = std::disjunction_v<std::is_same<T, Args>...>;
+
+template <typename T, typename... Args>
+inline constexpr bool is_one_of_f() {
+    return is_one_of_v<T, Args...>;
+}
+
+template <typename... Args>
+struct is_unique_t : std::true_type {};
+
+template <typename H, typename... T>
+struct is_unique_t<H, T...>
+    : std::bool_constant<!is_one_of_f<H, T...>() && is_unique_t<T...>::value> {};
+
+template <typename... Args>
+inline constexpr bool is_unique_v = is_unique_t<Args...>::value;
+
+// Given the type T find its index in Ts
+template <typename T, typename... Ts>
+static inline constexpr int find_index() {
+    static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+    constexpr bool matchVector[] = {std::is_same<T, Ts>::value...};
+
+    for (int index = 0; index < static_cast<int>(sizeof...(Ts)); ++index) {
+        if (matchVector[index]) {
+            return index;
+        }
+    }
+
+    return -1;
+}
+
+template <int N, typename T, typename... Ts>
+struct get_type_by_index_impl {
+    using type = typename get_type_by_index_impl<N - 1, Ts...>::type;
+};
+template <typename T, typename... Ts>
+struct get_type_by_index_impl<0, T, Ts...> {
+    using type = T;
+};
+
+// Given the index I return the type from Ts
+template <int I, typename... Ts>
+using get_type_by_index = typename get_type_by_index_impl<I, Ts...>::type;
+
+}  // namespace detail
+
+/*=====-----
+ *
+ * The overload trick to construct visitors from lambdas.
+ *
+ */
+template <class... Ts>
+struct overload : Ts... {
+    using Ts::operator()...;
+};
+template <class... Ts>
+overload(Ts...)->overload<Ts...>;
+
+/*=====-----
+ *
+ * Forward declarations
+ *
+ */
+template <typename... Ts>
+class PolyValue;
+
+template <typename T, typename... Ts>
+class ControlBlockVTable;
+
+/*=====-----
+ *
+ * The base control block that PolyValue holds.
+ *
+ * It does not contain anything else by the runtime tag.
+ *
+ */
+template <typename... Ts>
+class ControlBlock {
+    const int _tag;
+
+protected:
+    ControlBlock(int tag) noexcept : _tag(tag) {}
+
+public:
+    auto getRuntimeTag() const noexcept {
+        return _tag;
+    }
+};
+
+/*=====-----
+ *
+ * The concrete control block VTable generator.
+ *
+ * It must be empty ad PolyValue derives from the generators
+ * and we want EBO to kick in.
+ *
+ */
+template <typename T, typename... Ts>
+class ControlBlockVTable {
+    static constexpr int _staticTag = detail::find_index<T, Ts...>();
+    static_assert(_staticTag != -1, "Type must be on the list");
+
+    using AbstractType = ControlBlock<Ts...>;
+    using PolyValueType = PolyValue<Ts...>;
+
+    /*=====-----
+     *
+     * The concrete control block for every type T of Ts.
+     *
+     * It derives from the ControlBlock. All methods are private and only
+     * the friend class ControlBlockVTable can call them.
+     *
+     */
+    class ConcreteType : public AbstractType {
+        T _t;
+
+    public:
+        template <typename... Args>
+        ConcreteType(Args&&... args) : AbstractType(_staticTag), _t(std::forward<Args>(args)...) {}
+
+        const T* getPtr() const {
+            return &_t;
+        }
+
+        T* getPtr() {
+            return &_t;
+        }
+    };
+
+    static constexpr auto concrete(AbstractType* block) noexcept {
+        return static_cast<ConcreteType*>(block);
+    }
+
+    static constexpr auto concrete(const AbstractType* block) noexcept {
+        return static_cast<const ConcreteType*>(block);
+    }
+
+public:
+    template <typename... Args>
+    static AbstractType* make(Args&&... args) {
+        return new ConcreteType(std::forward<Args>(args)...);
+    }
+
+    static AbstractType* clone(const AbstractType* block) {
+        return new ConcreteType(*concrete(block));
+    }
+
+    static void destroy(AbstractType* block) noexcept {
+        delete concrete(block);
+    }
+
+    static bool compareEq(AbstractType* blockLhs, AbstractType* blockRhs) noexcept {
+        if (blockLhs->getRuntimeTag() == blockRhs->getRuntimeTag()) {
+            return *castConst<T>(blockLhs) == *castConst<T>(blockRhs);
+        }
+        return false;
+    }
+
+    template <typename U>
+    static constexpr bool is_v = std::is_base_of_v<U, T>;
+
+    template <typename U>
+    static U* cast(AbstractType* block) {
+        if constexpr (is_v<U>) {
+            return static_cast<U*>(concrete(block)->getPtr());
+        } else {
+            // gcc bug 81676
+            (void)block;
+            return nullptr;
+        }
+    }
+
+    template <typename U>
+    static const U* castConst(const AbstractType* block) {
+        if constexpr (is_v<U>) {
+            return static_cast<const U*>(concrete(block)->getPtr());
+        } else {
+            // gcc bug 81676
+            (void)block;
+            return nullptr;
+        }
+    }
+
+    template <typename V, typename... Args>
+    static auto visit(V&& v, PolyValueType& holder, AbstractType* block, Args&&... args) {
+        return v(holder, *cast<T>(block), std::forward<Args>(args)...);
+    }
+
+    template <typename V, typename... Args>
+    static auto visitConst(V&& v,
+                           const PolyValueType& holder,
+                           const AbstractType* block,
+                           Args&&... args) {
+        return v(holder, *castConst<T>(block), std::forward<Args>(args)...);
+    }
+};
+
+/*=====-----
+ *
+ * This is a variation on variant and polymorphic value theme.
+ *
+ * A tag based dispatch
+ *
+ * Supported operations:
+ * - construction
+ * - destruction
+ * - clone a = b;
+ * - cast a.cast<T>()
+ * - multi-method cast to common base a.cast<B>()
+ * - multi-method visit
+ */
+template <typename... Ts>
+class PolyValue : private ControlBlockVTable<Ts, Ts...>... {
+    static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+    static_assert(std::conjunction_v<std::is_empty<ControlBlockVTable<Ts, Ts...>>...>,
+                  "VTable base classes must be empty");
+
+    ControlBlock<Ts...>* _object{nullptr};
+
+    PolyValue(ControlBlock<Ts...>* object) noexcept : _object(object) {}
+
+    auto tag() const noexcept {
+        return _object->getRuntimeTag();
+    }
+
+    void check() const {
+        if (!_object) {
+            throw std::logic_error("PolyValue is empty");
+        }
+    }
+
+    static void destroy(ControlBlock<Ts...>* object) {
+        static constexpr std::array destroyTbl = {&ControlBlockVTable<Ts, Ts...>::destroy...};
+
+        destroyTbl[object->getRuntimeTag()](object);
+    }
+
+public:
+    PolyValue() = delete;
+
+    PolyValue(const PolyValue& other) {
+        static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
+        if (other._object) {
+            _object = cloneTbl[other.tag()](other._object);
+        }
+    }
+
+    PolyValue(PolyValue&& other) noexcept {
+        swap(other);
+    }
+
+    ~PolyValue() noexcept {
+        if (_object) {
+            destroy(_object);
+        }
+    }
+
+    PolyValue& operator=(PolyValue other) noexcept {
+        swap(other);
+        return *this;
+    }
+
+    template <typename T, typename... Args>
+    static PolyValue make(Args&&... args) {
+        return PolyValue{ControlBlockVTable<T, Ts...>::make(std::forward<Args>(args)...)};
+    }
+
+    template <int I>
+    using get_t = detail::get_type_by_index<I, Ts...>;
+
+    template <typename V, typename... Args>
+    auto visit(V&& v, Args&&... args) {
+        // unfortunately gcc rejects much nicer code, clang and msvc accept
+        // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
+        // visit<V>... };
+
+        using FunPtrType =
+            decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visit<V, Args...>);
+        static constexpr FunPtrType visitTbl[] = {
+            &ControlBlockVTable<Ts, Ts...>::template visit<V, Args...>...};
+
+        check();
+        return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+    }
+
+    template <typename V, typename... Args>
+    auto visit(V&& v, Args&&... args) const {
+        // unfortunately gcc rejects much nicer code, clang and msvc accept
+        // static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
+        // visitConst<V>... };
+
+        using FunPtrType =
+            decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, Args...>);
+        static constexpr FunPtrType visitTbl[] = {
+            &ControlBlockVTable<Ts, Ts...>::template visitConst<V, Args...>...};
+
+        check();
+        return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+    }
+
+    template <typename T>
+    T* cast() {
+        check();
+        static constexpr std::array castTbl = {&ControlBlockVTable<Ts, Ts...>::template cast<T>...};
+        return castTbl[tag()](_object);
+    }
+
+    template <typename T>
+    const T* cast() const {
+        static constexpr std::array castTbl = {
+            &ControlBlockVTable<Ts, Ts...>::template castConst<T>...};
+
+        check();
+        return castTbl[tag()](_object);
+    }
+
+    template <typename T>
+    bool is() const {
+        static constexpr std::array isTbl = {ControlBlockVTable<Ts, Ts...>::template is_v<T>...};
+
+        check();
+        return isTbl[tag()];
+    }
+
+    bool empty() const {
+        return !_object;
+    }
+
+    void swap(PolyValue& other) noexcept {
+        std::swap(other._object, _object);
+    }
+
+    bool operator==(const PolyValue& rhs) const noexcept {
+        static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+        return cmp[tag()](_object, rhs._object);
+    }
+};
+
+}  // namespace algebra
+}  // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/memo.cpp b/src/mongo/db/query/optimizer/memo.cpp
new file mode 100644
index 00000000000..c4dadbb3d5a
--- /dev/null
+++ b/src/mongo/db/query/optimizer/memo.cpp
@@ -0,0 +1,43 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/db/query/optimizer/algebra/operator.h"
+#include "mongo/db/query/optimizer/memo.h"
+#include "mongo/db/query/optimizer/node.h"
+
+namespace mongo::optimizer {
+
+std::string MemoGenerator::generateMemo(const PolymorphicNode& e) {
+    _os.str("");
+    _os.clear();
+    algebra::transport<false>(e, *this);
+    return _os.str();
+}
+
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/visitor.h b/src/mongo/db/query/optimizer/memo.h
index 1aa0a886fab..ad3703f8fd8 100644
--- a/src/mongo/db/query/optimizer/visitor.h
+++ b/src/mongo/db/query/optimizer/memo.h
@@ -31,16 +31,24 @@
 
 #include <string>
 
+#include "mongo/db/query/optimizer/node.h"
+
 namespace mongo::optimizer {
 
-class AbstractVisitor {
+class MemoGenerator {
 public:
-    virtual void visit(const ScanNode& node) = 0;
-    virtual void visit(const MultiJoinNode& node) = 0;
-    virtual void visit(const UnionNode& node) = 0;
-    virtual void visit(const GroupByNode& node) = 0;
-    virtual void visit(const UnwindNode& node) = 0;
-    virtual void visit(const WindNode& node) = 0;
+    template <typename T, typename... Ts>
+    void transport(const T&, Ts&&...) {}
+
+    template <typename T>
+    void prepare(const T& n) {
+        n.generateMemo(_os);
+    }
+
+    std::string generateMemo(const PolymorphicNode& e);
+
+private:
+    std::ostringstream _os;
 };
 
-}  // namespace mongo::optimizer
+} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index 4836dcce39e..a1455efd60f 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -30,130 +30,19 @@
 #include <functional>
 #include <stack>
 
+#include "mongo/db/query/optimizer/memo.h"
 #include "mongo/db/query/optimizer/node.h"
-#include "mongo/db/query/optimizer/visitor.h"
-#include "mongo/util/assert_util.h"
 
 namespace mongo::optimizer {
 
-Node::Node(Context& ctx) : _nodeId(ctx.getNextNodeId()), _children() {}
-
-Node::Node(Context& ctx, NodePtr child) : _nodeId(ctx.getNextNodeId()) {
-    _children.push_back(std::move(child));
-}
-
-Node::Node(Context& ctx, ChildVector children)
-    : _nodeId(ctx.getNextNodeId()), _children(std::move(children)) {}
+Node::Node(Context& ctx) : _nodeId(ctx.getNextNodeId()) {}
 
 void Node::generateMemoBase(std::ostringstream& os) const {
     os << "NodeId: " << _nodeId << "\n";
 }
 
-void Node::visitPreOrder(AbstractVisitor& visitor) const {
-    visit(visitor);
-    for (const NodePtr& ptr : _children) {
-        ptr->visitPreOrder(visitor);
-    }
-}
-
-void Node::visitPostOrder(AbstractVisitor& visitor) const {
-    for (const NodePtr& ptr : _children) {
-        ptr->visitPostOrder(visitor);
-    }
-    visit(visitor);
-}
-
-std::string Node::generateMemo() const {
-    class MemoVisitor : public AbstractVisitor {
-    protected:
-        void visit(const ScanNode& node) override {
-            node.generateMemo(_os);
-        }
-        void visit(const MultiJoinNode& node) override {
-            node.generateMemo(_os);
-        }
-        void visit(const UnionNode& node) override {
-            node.generateMemo(_os);
-        }
-        void visit(const GroupByNode& node) override {
-            node.generateMemo(_os);
-        }
-        void visit(const UnwindNode& node) override {
-            node.generateMemo(_os);
-        }
-        void visit(const WindNode& node) override {
-            node.generateMemo(_os);
-        }
-
-    public:
-        std::ostringstream _os;
-    };
-
-    MemoVisitor visitor;
-    visitPreOrder(visitor);
-    return visitor._os.str();
-}
-
-NodePtr Node::clone(Context& ctx) const {
-    class CloneVisitor : public AbstractVisitor {
-    public:
-        explicit CloneVisitor(Context& ctx) : _ctx(ctx), _childStack() {}
-
-    protected:
-        void visit(const ScanNode& node) override {
-            doClone(node, [&](ChildVector v){ return ScanNode::clone(_ctx, node); });
-        }
-        void visit(const MultiJoinNode& node) override {
-            doClone(node, [&](ChildVector v){ return MultiJoinNode::clone(_ctx, node, std::move(v)); });
-        }
-        void visit(const UnionNode& node) override {
-            doClone(node, [&](ChildVector v){ return UnionNode::clone(_ctx, node, std::move(v)); });
-        }
-        void visit(const GroupByNode& node) override {
-            doClone(node, [&](ChildVector v){ return GroupByNode::clone(_ctx, node, std::move(v.at(0))); });
-        }
-        void visit(const UnwindNode& node) override {
-            doClone(node, [&](ChildVector v){ return UnwindNode::clone(_ctx, node, std::move(v.at(0))); });
-        }
-        void visit(const WindNode& node) override {
-            doClone(node, [&](ChildVector v){ return WindNode::clone(_ctx, node, std::move(v.at(0))); });
-        }
-
-    private:
-        void doClone(const Node& node, const std::function<NodePtr(ChildVector newChildren)>& cloneFn) {
-            ChildVector newChildren;
-            for (int i = 0; i < node.getChildCount(); i++) {
-                newChildren.push_back(std::move(_childStack.top()));
-                _childStack.pop();
-            }
-            _childStack.push(cloneFn(std::move(newChildren)));
-        }
-
-    public:
-        Context& _ctx;
-        std::stack<NodePtr> _childStack;
-    };
-
-    CloneVisitor visitor(ctx);
-    visitPostOrder(visitor);
-    invariant(visitor._childStack.size() == 1);
-    return std::move(visitor._childStack.top());
-}
-
-int Node::getChildCount() const {
-    return _children.size();
-}
-
-NodePtr ScanNode::create(Context& ctx, CollectionNameType collectionName) {
-    return NodePtr(new ScanNode(ctx, std::move(collectionName)));
-}
-
-NodePtr ScanNode::clone(Context& ctx, const ScanNode& other) {
-    return create(ctx, other._collectionName);
-}
-
 ScanNode::ScanNode(Context& ctx, CollectionNameType collectionName)
-    : Node(ctx), _collectionName(std::move(collectionName)) {}
+    : Base(), Node(ctx), _collectionName(std::move(collectionName)) {}
 
 void ScanNode::generateMemo(std::ostringstream& os) const {
     Node::generateMemoBase(os);
@@ -161,27 +50,12 @@ void ScanNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void ScanNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
-NodePtr MultiJoinNode::create(Context& ctx,
-                              FilterSet filterSet,
-                              ProjectionMap projectionMap,
-                              ChildVector children) {
-    return NodePtr(new MultiJoinNode(
-        ctx, std::move(filterSet), std::move(projectionMap), std::move(children)));
-}
-
-NodePtr MultiJoinNode::clone(Context& ctx, const MultiJoinNode& other, ChildVector newChildren) {
-    return create(ctx, other._filterSet, other._projectionMap, std::move(newChildren));
-}
-
 MultiJoinNode::MultiJoinNode(Context& ctx,
                              FilterSet filterSet,
                              ProjectionMap projectionMap,
-                             ChildVector children)
-    : Node(ctx, std::move(children)),
+                             PolymorphicNodeVector children)
+    : Base(std::move(children)),
+      Node(ctx),
       _filterSet(std::move(filterSet)),
       _projectionMap(std::move(projectionMap)) {}
 
@@ -191,20 +65,8 @@ void MultiJoinNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void MultiJoinNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
-NodePtr UnionNode::create(Context& ctx, ChildVector children) {
-    return NodePtr(new UnionNode(ctx, std::move(children)));
-}
-
-NodePtr UnionNode::clone(Context& ctx, const UnionNode& other, ChildVector newChildren) {
-    return create(ctx, std::move(newChildren));
-}
-
-UnionNode::UnionNode(Context& ctx, ChildVector children)
-    : Node(ctx, std::move(children)) {}
+UnionNode::UnionNode(Context& ctx, PolymorphicNodeVector children)
+    : Base(std::move(children)), Node(ctx) {}
 
 void UnionNode::generateMemo(std::ostringstream& os) const {
     Node::generateMemoBase(os);
@@ -212,27 +74,12 @@ void UnionNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void UnionNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
-NodePtr GroupByNode::create(Context& ctx,
-                            GroupByNode::GroupByVector groupByVector,
-                            GroupByNode::ProjectionMap projectionMap,
-                            NodePtr child) {
-    return NodePtr(
-        new GroupByNode(ctx, std::move(groupByVector), std::move(projectionMap), std::move(child)));
-}
-
-NodePtr GroupByNode::clone(Context& ctx, const GroupByNode& other, NodePtr newChild) {
-    return create(ctx, other._groupByVector, other._projectionMap, std::move(newChild));
-}
-
 GroupByNode::GroupByNode(Context& ctx,
                          GroupByNode::GroupByVector groupByVector,
                          GroupByNode::ProjectionMap projectionMap,
-                         NodePtr child)
-    : Node(ctx, std::move(child)),
+                         PolymorphicNode child)
+    : Base(std::move(child)),
+      Node(ctx),
       _groupByVector(std::move(groupByVector)),
       _projectionMap(std::move(projectionMap)) {}
 
@@ -242,27 +89,12 @@ void GroupByNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void GroupByNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
-NodePtr UnwindNode::create(Context& ctx,
-                           ProjectionName projectionName,
-                           const bool retainNonArrays,
-                           NodePtr child) {
-    return NodePtr(
-        new UnwindNode(ctx, std::move(projectionName), retainNonArrays, std::move(child)));
-}
-
-NodePtr UnwindNode::clone(Context& ctx, const UnwindNode& other, NodePtr newChild) {
-    return create(ctx, other._projectionName, other._retainNonArrays, std::move(newChild));
-}
-
 UnwindNode::UnwindNode(Context& ctx,
                        ProjectionName projectionName,
                        const bool retainNonArrays,
-                       NodePtr child)
-    : Node(ctx, std::move(child)),
+                       PolymorphicNode child)
+    : Base(std::move(child)),
+      Node(ctx),
       _projectionName(std::move(projectionName)),
       _retainNonArrays(retainNonArrays) {}
 
@@ -272,20 +104,8 @@ void UnwindNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void UnwindNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
-NodePtr WindNode::create(Context& ctx, ProjectionName projectionName, NodePtr child) {
-    return NodePtr(new WindNode(ctx, std::move(projectionName), std::move(child)));
-}
-
-NodePtr WindNode::clone(Context& ctx, const WindNode& other, NodePtr newChild) {
-    return create(ctx, other._projectionName, std::move(newChild));
-}
-
-WindNode::WindNode(Context& ctx, ProjectionName projectionName, NodePtr child)
-    : Node(ctx, std::move(child)), _projectionName(std::move(projectionName)) {}
+WindNode::WindNode(Context& ctx, ProjectionName projectionName, PolymorphicNode child)
+    : Base(std::move(child)), Node(ctx), _projectionName(std::move(projectionName)) {}
 
 void WindNode::generateMemo(std::ostringstream& os) const {
     Node::generateMemoBase(os);
@@ -293,8 +113,4 @@ void WindNode::generateMemo(std::ostringstream& os) const {
        << "\n";
 }
 
-void WindNode::visit(AbstractVisitor& visitor) const {
-    visitor.visit(*this);
-}
-
 }  // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h
index 78010d7d333..33215f967e0 100644
--- a/src/mongo/db/query/optimizer/node.h
+++ b/src/mongo/db/query/optimizer/node.h
@@ -37,6 +37,7 @@
 #include <utility>
 #include <vector>
 
+#include "mongo/db/query/optimizer/algebra/operator.h"
 #include "mongo/db/query/optimizer/defs.h"
 #include "mongo/db/query/optimizer/filter.h"
 #include "mongo/db/query/optimizer/projection.h"
@@ -45,156 +46,137 @@
 
 namespace mongo::optimizer {
 
-class Node;
-using NodePtr = std::unique_ptr<Node>;
 
-class AbstractVisitor;
+class ScanNode;
+class MultiJoinNode;
+class UnionNode;
+class GroupByNode;
+class UnwindNode;
+class WindNode;
 
-class Node {
-public:
-    using ChildVector = std::vector<NodePtr>;
+using PolymorphicNode =
+    algebra::PolyValue<ScanNode, MultiJoinNode, UnionNode, GroupByNode, UnwindNode, WindNode>;
+
+template <typename Derived, size_t Arity>
+using Operator = algebra::OpSpecificArity<PolymorphicNode, Derived, Arity>;
+
+template <typename Derived, size_t Arity>
+using OperatorDynamic = algebra::OpSpecificDynamicArity<PolymorphicNode, Derived, Arity>;
+
+template <typename Derived>
+using OperatorDynamicHomogenous = OperatorDynamic<Derived, 0>;
+
+using PolymorphicNodeVector = std::vector<PolymorphicNode>;
+
+template <typename T, typename... Args>
+inline auto make(Args&&... args) {
+    return PolymorphicNode::make<T>(std::forward<Args>(args)...);
+}
 
+template <typename... Args>
+inline auto makeSeq(Args&&... args) {
+    PolymorphicNodeVector seq;
+    (seq.emplace_back(std::forward<Args>(args)), ...);
+    return seq;
+}
+
+class Node {
 protected:
     explicit Node(Context& ctx);
-    explicit Node(Context& ctx, NodePtr child);
-    explicit Node(Context& ctx, ChildVector children);
 
     void generateMemoBase(std::ostringstream& os) const;
 
-    virtual void visit(AbstractVisitor& visitor) const = 0;
-    void visitPreOrder(AbstractVisitor& visitor) const;
-    void visitPostOrder(AbstractVisitor& visitor) const;
-
-    // clone
 public:
     Node() = delete;
 
-    std::string generateMemo() const;
-
-    NodePtr clone(Context& ctx) const;
-
-    int getChildCount() const;
-
 private:
     const NodeIdType _nodeId;
-    ChildVector _children;
 };
 
-class ScanNode : public Node {
+class ScanNode final : public Operator<ScanNode, 0>, public Node {
+    using Base = Operator<ScanNode, 0>;
+
 public:
-    static NodePtr create(Context& ctx, CollectionNameType collectionName);
-    static NodePtr clone(Context& ctx, const ScanNode& other);
+    explicit ScanNode(Context& ctx, CollectionNameType collectionName);
 
     void generateMemo(std::ostringstream& os) const;
 
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
 private:
-    explicit ScanNode(Context& ctx, CollectionNameType collectionName);
-
     const CollectionNameType _collectionName;
 };
 
-class MultiJoinNode : public Node {
+class MultiJoinNode final : public OperatorDynamicHomogenous<MultiJoinNode>, public Node {
+    using Base = OperatorDynamicHomogenous<MultiJoinNode>;
+
 public:
     using FilterSet = std::unordered_set<FilterType>;
     using ProjectionMap = std::unordered_map<ProjectionName, ProjectionType>;
 
-    static NodePtr create(Context& ctx,
-                          FilterSet filterSet,
-                          ProjectionMap projectionMap,
-                          ChildVector children);
-    static NodePtr clone(Context& ctx, const MultiJoinNode& other, ChildVector newChildren);
-
-    void generateMemo(std::ostringstream& os) const;
-
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
-private:
     explicit MultiJoinNode(Context& ctx,
                            FilterSet filterSet,
                            ProjectionMap projectionMap,
-                           ChildVector children);
+                           PolymorphicNodeVector children);
 
+    void generateMemo(std::ostringstream& os) const;
+
+private:
     FilterSet _filterSet;
     ProjectionMap _projectionMap;
 };
 
-class UnionNode : public Node {
+class UnionNode final : public OperatorDynamicHomogenous<UnionNode>, public Node {
+    using Base = OperatorDynamicHomogenous<UnionNode>;
+
 public:
-    static NodePtr create(Context& ctx, ChildVector children);
-    static NodePtr clone(Context& ctx, const UnionNode& other, ChildVector newChildren);
+    explicit UnionNode(Context& ctx, PolymorphicNodeVector children);
 
     void generateMemo(std::ostringstream& os) const;
-
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
-private:
-    explicit UnionNode(Context& ctx, ChildVector children);
 };
 
-class GroupByNode : public Node {
+class GroupByNode : public Operator<GroupByNode, 1>, public Node {
+    using Base = Operator<GroupByNode, 1>;
+
 public:
     using GroupByVector = std::vector<ProjectionName>;
     using ProjectionMap = std::unordered_map<ProjectionName, ProjectionType>;
 
-    static NodePtr create(Context& ctx,
-                          GroupByVector groupByVector,
-                          ProjectionMap projectionMap,
-                          NodePtr child);
-    static NodePtr clone(Context& ctx, const GroupByNode& other, NodePtr newChild);
-
-    void generateMemo(std::ostringstream& os) const;
-
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
-private:
     explicit GroupByNode(Context& ctx,
                          GroupByVector groupByVector,
                          ProjectionMap projectionMap,
-                         NodePtr child);
+                         PolymorphicNode child);
+
+    void generateMemo(std::ostringstream& os) const;
 
+private:
     GroupByVector _groupByVector;
     ProjectionMap _projectionMap;
 };
 
-class UnwindNode : public Node {
+class UnwindNode final : public Operator<UnwindNode, 1>, public Node {
+    using Base = Operator<UnwindNode, 1>;
+
 public:
-    static NodePtr create(Context& ctx,
-                          ProjectionName projectionName,
-                          bool retainNonArrays,
-                          NodePtr child);
-    static NodePtr clone(Context& ctx, const UnwindNode& other, NodePtr newChild);
+    explicit UnwindNode(Context& ctx,
+                        ProjectionName projectionName,
+                        bool retainNonArrays,
+                        PolymorphicNode child);
 
     void generateMemo(std::ostringstream& os) const;
 
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
 private:
-    UnwindNode(Context& ctx, ProjectionName projectionName, bool retainNonArrays, NodePtr child);
-
     const ProjectionName _projectionName;
     const bool _retainNonArrays;
 };
 
-class WindNode : public Node {
+class WindNode final : public Operator<WindNode, 1>, public Node {
+    using Base = Operator<WindNode, 1>;
+
 public:
-    static NodePtr create(Context& ctx, ProjectionName projectionName, NodePtr child);
-    static NodePtr clone(Context& ctx, const WindNode& other, NodePtr newChild);
+    explicit WindNode(Context& ctx, ProjectionName projectionName, PolymorphicNode child);
 
     void generateMemo(std::ostringstream& os) const;
 
-protected:
-    void visit(AbstractVisitor& visitor) const override;
-
 private:
-    WindNode(Context& ctx, ProjectionName projectionName, NodePtr child);
-
     const ProjectionName _projectionName;
 };
 
diff --git a/src/mongo/db/query/optimizer/optimizer_test.cpp b/src/mongo/db/query/optimizer/optimizer_test.cpp
index 86966e05a7e..f1cffe77303 100644
--- a/src/mongo/db/query/optimizer/optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/optimizer_test.cpp
@@ -27,6 +27,7 @@
  *    it in the license file.
  */
 
+#include "mongo/db/query/optimizer/memo.h"
 #include "mongo/db/query/optimizer/node.h"
 #include "mongo/unittest/unittest.h"
 
@@ -35,15 +36,20 @@ namespace {
 
 TEST(Optimizer, Basic) {
     Context ctx;
+    MemoGenerator gen;
 
-    NodePtr ptrScan = ScanNode::create(ctx, "test");
-    Node::ChildVector v;
-    v.push_back(std::move(ptrScan));
-    NodePtr ptrJoin = MultiJoinNode::create(ctx, {}, {}, std::move(v));
-    ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", ptrJoin->generateMemo());
+    PolymorphicNode scanNode = make<ScanNode>(ctx, "test");
+    ASSERT_EQ("NodeId: 0\nScan\n", gen.generateMemo(scanNode));
 
-    NodePtr cloned = ptrJoin->clone(ctx);
-    ASSERT_EQ("NodeId: 3\nMultiJoin\nNodeId: 2\nScan\n", cloned->generateMemo());
+    PolymorphicNode joinNode = make<MultiJoinNode>(ctx,
+                                                   MultiJoinNode::FilterSet{},
+                                                   MultiJoinNode::ProjectionMap{},
+                                                   makeSeq(std::move(scanNode)));
+    ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", gen.generateMemo(joinNode));
+
+
+    PolymorphicNode cloned = joinNode;
+    ASSERT_EQ("NodeId: 1\nMultiJoin\nNodeId: 0\nScan\n", gen.generateMemo(cloned));
 }
 
 }  // namespace
diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
index 05f9bcefb96..1a338abf238 100644
--- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
@@ -41,6 +41,7 @@
 #include "mongo/db/exec/sbe/stages/loop_join.h"
 #include "mongo/db/exec/sbe/stages/project.h"
 #include "mongo/db/exec/sbe/stages/scan.h"
+#include "mongo/db/exec/sbe/stages/union.h"
 #include "mongo/db/query/sbe_stage_builder_filter.h"
 #include "mongo/db/query/util/make_data_structure.h"
 #include "mongo/db/storage/oplog_hack.h"
@@ -330,20 +331,63 @@ generateGenericCollScan(const Collection* collection,
 
     // Check if the scan should be started after the provided resume RecordId and construct a nested
     // loop join sub-tree to project out the resume RecordId as a seekRecordIdSlot and feed it to
-    // the inner side (scan).
-    //
-    // Note that we also inject a 'skip 1' stage on top of the inner branch, as we need to start
-    // _after_ the resume RecordId.
-    //
-    // TODO SERVER-48472: raise KeyNotFound error if we cannot position the cursor on
-    // seekRecordIdSlot.
+    // the inner side (scan). We will also construct a union sub-tree as an outer side of the loop
+    // join to implement the check that the record we're trying to reposition the scan exists.
     if (seekRecordIdSlot && !isTailableResumeBranch) {
+        // Project out the RecordId we want to resume from as 'seekSlot'.
+        auto seekSlot = slotIdGenerator->generate();
+        auto projStage = sbe::makeProjectStage(
+            sbe::makeS<sbe::LimitSkipStage>(sbe::makeS<sbe::CoScanStage>(), 1, boost::none),
+            seekSlot,
+            sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::NumberInt64,
+                                       csn->resumeAfterRecordId->repr()));
+
+        // Construct a 'seek' branch of the 'union'. If we're succeeded to reposition the cursor,
+        // the branch will output  the 'seekSlot' to start the real scan from, otherwise it will
+        // produce EOF.
+        auto seekBranch =
+            sbe::makeS<sbe::LoopJoinStage>(std::move(projStage),
+                                           sbe::makeS<sbe::ScanStage>(nss,
+                                                                      boost::none,
+                                                                      boost::none,
+                                                                      std::vector<std::string>{},
+                                                                      sbe::makeSV(),
+                                                                      seekSlot,
+                                                                      forward,
+                                                                      yieldPolicy,
+                                                                      tracker),
+
+                                           sbe::makeSV(seekSlot),
+                                           sbe::makeSV(seekSlot),
+                                           nullptr);
+
+        // Construct a 'fail' branch of the union. The 'unusedSlot' is needed as each union branch
+        // must have the same number of slots, and we use just one in the 'seek' branch above. This
+        // branch will only be executed if the 'seek' branch produces EOF, which can only happen if
+        // if the seek did not find the record id specified in $_resumeAfter.
+        auto unusedSlot = slotIdGenerator->generate();
+        auto failBranch = sbe::makeProjectStage(
+            sbe::makeS<sbe::CoScanStage>(),
+            unusedSlot,
+            sbe::makeE<sbe::EFail>(
+                ErrorCodes::KeyNotFound,
+                str::stream() << "Failed to resume collection scan: the recordId from which we are "
+                              << "attempting to resume no longer exists in the collection: "
+                              << csn->resumeAfterRecordId));
+
+        // Construct a union stage from the 'seek' and 'fail' branches. Note that this stage will
+        // ever produce a single call to getNext() due to a 'limit 1' sitting on top of it.
+        auto unionStage = sbe::makeS<sbe::UnionStage>(
+            make_vector<std::unique_ptr<sbe::PlanStage>>(std::move(seekBranch),
+                                                         std::move(failBranch)),
+            std::vector<sbe::value::SlotVector>{sbe::makeSV(seekSlot), sbe::makeSV(unusedSlot)},
+            sbe::makeSV(*seekRecordIdSlot));
+
+        // Construct the final loop join. Note that we also inject a 'skip 1' stage on top of the
+        // inner branch, as we need to start _after_ the resume RecordId, and a 'limit 1' stage on
+        // top of the outer branch, as it should produce just a single seek recordId.
         stage = sbe::makeS<sbe::LoopJoinStage>(
-            sbe::makeProjectStage(
-                sbe::makeS<sbe::LimitSkipStage>(sbe::makeS<sbe::CoScanStage>(), 1, boost::none),
-                *seekRecordIdSlot,
-                sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::NumberInt64,
-                                           csn->resumeAfterRecordId->repr())),
+            sbe::makeS<sbe::LimitSkipStage>(std::move(unionStage), 1, boost::none),
             sbe::makeS<sbe::LimitSkipStage>(std::move(stage), boost::none, 1),
             sbe::makeSV(),
             sbe::makeSV(*seekRecordIdSlot),
diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp
index b17164df951..22c03198b9b 100644
--- a/src/mongo/db/query/sbe_stage_builder_expression.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp
@@ -883,13 +883,103 @@ public:
         _context->pushExpr(
             sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(absExpr)));
     }
+
     void visit(ExpressionAdd* expr) final {
-        _context->ensureArity(2);
-        auto rhs = _context->popExpr();
-        auto lhs = _context->popExpr();
-        _context->pushExpr(
-            sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::add, std::move(lhs), std::move(rhs)));
+        size_t arity = expr->getChildren().size();
+        _context->ensureArity(arity);
+        auto frameId = _context->frameIdGenerator->generate();
+
+
+        auto generateNotNumberOrDate = [frameId](const sbe::value::SlotId slotId) {
+            sbe::EVariable var{frameId, slotId};
+            return sbe::makeE<sbe::EPrimBinary>(
+                sbe::EPrimBinary::logicAnd,
+                sbe::makeE<sbe::EPrimUnary>(
+                    sbe::EPrimUnary::logicNot,
+                    sbe::makeE<sbe::EFunction>("isNumber", sbe::makeEs(var.clone()))),
+                sbe::makeE<sbe::EPrimUnary>(
+                    sbe::EPrimUnary::logicNot,
+                    sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(var.clone()))));
+        };
+
+        if (arity == 2) {
+            auto rhs = _context->popExpr();
+            auto lhs = _context->popExpr();
+            auto binds = sbe::makeEs(std::move(lhs), std::move(rhs));
+            sbe::EVariable lhsVar{frameId, 0};
+            sbe::EVariable rhsVar{frameId, 1};
+
+            auto addExpr = sbe::makeE<sbe::EIf>(
+                sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr,
+                                             generateNullOrMissing(frameId, 0),
+                                             generateNullOrMissing(frameId, 1)),
+                sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0),
+                sbe::makeE<sbe::EIf>(
+                    sbe::makeE<sbe::EPrimBinary>(sbe::EPrimBinary::logicOr,
+                                                 generateNotNumberOrDate(0),
+                                                 generateNotNumberOrDate(1)),
+                    sbe::makeE<sbe::EFail>(
+                        ErrorCodes::Error{4974201},
+                        "only numbers and dates are allowed in an $add expression"),
+                    sbe::makeE<sbe::EIf>(
+                        sbe::makeE<sbe::EPrimBinary>(
+                            sbe::EPrimBinary::logicAnd,
+                            sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(lhsVar.clone())),
+                            sbe::makeE<sbe::EFunction>("isDate", sbe::makeEs(rhsVar.clone()))),
+                        sbe::makeE<sbe::EFail>(ErrorCodes::Error{4974202},
+                                               "only one date allowed in an $add expression"),
+                        sbe::makeE<sbe::EPrimBinary>(
+                            sbe::EPrimBinary::add, lhsVar.clone(), rhsVar.clone()))));
+
+            _context->pushExpr(
+                sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(addExpr)));
+        } else {
+            std::vector<std::unique_ptr<sbe::EExpression>> binds;
+            for (size_t i = 0; i < arity; i++) {
+                binds.push_back(_context->popExpr());
+            }
+            std::reverse(std::begin(binds), std::end(binds));
+
+            std::vector<std::unique_ptr<sbe::EExpression>> checkExprsNull;
+            std::vector<std::unique_ptr<sbe::EExpression>> checkExprsNotNumberOrDate;
+            std::vector<std::unique_ptr<sbe::EExpression>> argVars;
+            for (size_t idx = 0; idx < arity; idx++) {
+                checkExprsNull.push_back(generateNullOrMissing(frameId, idx));
+                checkExprsNotNumberOrDate.push_back(generateNotNumberOrDate(idx));
+                argVars.push_back(sbe::makeE<sbe::EVariable>(frameId, idx));
+            }
+
+            using iter_t = std::vector<std::unique_ptr<sbe::EExpression>>::iterator;
+            auto checkNullAllArguments =
+                std::accumulate(std::move_iterator<iter_t>(checkExprsNull.begin() + 1),
+                                std::move_iterator<iter_t>(checkExprsNull.end()),
+                                std::move(checkExprsNull.front()),
+                                [](auto&& acc, auto&& ex) {
+                                    return sbe::makeE<sbe::EPrimBinary>(
+                                        sbe::EPrimBinary::logicOr, std::move(acc), std::move(ex));
+                                });
+            auto checkNotNumberOrDateAllArguments =
+                std::accumulate(std::move_iterator<iter_t>(checkExprsNotNumberOrDate.begin() + 1),
+                                std::move_iterator<iter_t>(checkExprsNotNumberOrDate.end()),
+                                std::move(checkExprsNotNumberOrDate.front()),
+                                [](auto&& acc, auto&& ex) {
+                                    return sbe::makeE<sbe::EPrimBinary>(
+                                        sbe::EPrimBinary::logicOr, std::move(acc), std::move(ex));
+                                });
+            auto addExpr = sbe::makeE<sbe::EIf>(
+                std::move(checkNullAllArguments),
+                sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Null, 0),
+                sbe::makeE<sbe::EIf>(
+                    std::move(checkNotNumberOrDateAllArguments),
+                    sbe::makeE<sbe::EFail>(
+                        ErrorCodes::Error{4974203},
+                        "only numbers and dates are allowed in an $add expression"),
+                    sbe::makeE<sbe::EFunction>("doubleDoubleSum", std::move(argVars))));
+            _context->pushExpr(
+                sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(addExpr)));
+        }
     }
+
     void visit(ExpressionAllElementsTrue* expr) final {
         unsupportedExpression(expr->getOpName());
     }
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index eeddbed7948..0ef88b0a410 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1232,6 +1232,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/repl/speculative_authenticate',
         '$BUILD_DIR/mongo/db/stats/counters',
         '$BUILD_DIR/mongo/transport/message_compressor',
+        'primary_only_service',
         'replication_auth',
         'split_horizon',
     ],
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 77daa595256..a9c8b47e61a 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -700,8 +700,9 @@ void BackgroundSync::_runRollback(OperationContext* opCtx,
 
     ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
 
-    // Explicitly start future read transactions without a timestamp.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     // Rollback is a synchronous operation that uses the task executor and may not be
     // executed inside the fetcher callback.
@@ -878,8 +879,9 @@ void BackgroundSync::start(OperationContext* opCtx) {
     OpTime lastAppliedOpTime;
     ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState());
 
-    // Explicitly start future read transactions without a timestamp.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     do {
         lastAppliedOpTime = _readLastAppliedOpTime(opCtx);
diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
index eab00297cdd..23fce736413 100644
--- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp
+++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
@@ -278,7 +278,7 @@ Status CollectionBulkLoaderImpl::commit() {
 
             status = _idIndexBlock->drainBackgroundWrites(
                 _opCtx.get(),
-                RecoveryUnit::ReadSource::kUnset,
+                RecoveryUnit::ReadSource::kNoTimestamp,
                 _nss.isSystemDotViews() ? IndexBuildInterceptor::DrainYieldPolicy::kNoYield
                                         : IndexBuildInterceptor::DrainYieldPolicy::kYield);
             if (!status.isOK()) {
diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp
index 24ff5ad96d6..f769fd14c6d 100644
--- a/src/mongo/db/repl/oplog_applier_impl.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl.cpp
@@ -779,8 +779,9 @@ Status OplogApplierImpl::applyOplogBatchPerWorker(OperationContext* opCtx,
     // destroyed by unstash in its destructor. Thus we set the flag explicitly.
     opCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false);
 
-    // Explicitly start future read transactions without a timestamp.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     // When querying indexes, we return the record matching the key if it exists, or an adjacent
     // document. This means that it is possible for us to hit a prepare conflict if we query for an
diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp
index 99f7077519d..efd257d26d8 100644
--- a/src/mongo/db/repl/oplog_batcher.cpp
+++ b/src/mongo/db/repl/oplog_batcher.cpp
@@ -121,13 +121,6 @@ bool isUnpreparedCommit(const OplogEntry& entry) {
  * the final oplog entry in the transaction is processed individually, since the operations are not
  * actually run until the commit operation is reached.
  *
- * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately
- * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog
- * entries on 'system.views' are being applied out of the original order.
- *
- * Process updates to 'admin.system.version' individually as well so the secondary's FCV when
- * processing each operation matches the primary's when committing that operation.
- *
  * The ends of large transactions (> 16MB) should also be processed immediately on its own in order
  * to avoid scenarios where parts of the transaction is batched with other operations not in the
  * transaction.
@@ -143,8 +136,7 @@ bool OplogBatcher::mustProcessIndividually(const OplogEntry& entry) {
     }
 
     const auto nss = entry.getNss();
-    return nss.isSystemDotViews() || nss.isServerConfigurationCollection() ||
-        nss.isPrivilegeCollection();
+    return nss.mustBeAppliedInOwnOplogBatch();
 }
 
 std::size_t OplogBatcher::getOpCount(const OplogEntry& entry) {
@@ -355,12 +347,6 @@ std::size_t getBatchLimitOplogEntries() {
 std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* storageInterface) {
     // We can't change the timestamp source within a write unit of work.
     invariant(!opCtx->lockState()->inAWriteUnitOfWork());
-    // We're only reading oplog metadata, so the timestamp is not important.  If we read with the
-    // default (which is lastApplied on secondaries), we may end up with a reader that is at
-    // lastApplied.  If we then roll back, then when we reconstruct prepared transactions during
-    // rollback recovery we will be preparing transactions before the read timestamp, which triggers
-    // an assertion in WiredTiger.
-    ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
     auto oplogMaxSizeResult = storageInterface->getOplogMaxSize(opCtx);
     auto oplogMaxSize = fassert(40301, oplogMaxSizeResult);
     return std::min(oplogMaxSize / 10, std::size_t(replBatchLimitBytes.load()));
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index acf79190d1b..ab7f25ec861 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -227,9 +227,21 @@ void PrimaryOnlyServiceRegistry::onStepDown() {
     }
 }
 
+void PrimaryOnlyServiceRegistry::reportServiceInfo(BSONObjBuilder* result) {
+    BSONObjBuilder subBuilder(result->subobjStart("primaryOnlyServices"));
+    for (auto& service : _servicesByName) {
+        subBuilder.appendNumber(service.first, service.second->getNumberOfInstances());
+    }
+}
+
 PrimaryOnlyService::PrimaryOnlyService(ServiceContext* serviceContext)
     : _serviceContext(serviceContext) {}
 
+size_t PrimaryOnlyService::getNumberOfInstances() {
+    stdx::lock_guard lk(_mutex);
+    return _instances.size();
+}
+
 bool PrimaryOnlyService::isRunning() const {
     stdx::lock_guard lk(_mutex);
     return _state == State::kRunning;
diff --git a/src/mongo/db/repl/primary_only_service.h b/src/mongo/db/repl/primary_only_service.h
index 0d57cf96d9b..664ecd9fa11 100644
--- a/src/mongo/db/repl/primary_only_service.h
+++ b/src/mongo/db/repl/primary_only_service.h
@@ -217,6 +217,11 @@ public:
      */
     bool isRunning() const;
 
+    /**
+     * Returns the number of currently running Instances of this service.
+     */
+    size_t getNumberOfInstances();
+
 protected:
     /**
      * Constructs a new Instance object with the given initial state.
@@ -328,6 +333,12 @@ public:
      */
     PrimaryOnlyService* lookupServiceByNamespace(const NamespaceString& ns);
 
+    /**
+     * Adds a 'primaryOnlyServices' sub-obj to the 'result' BSONObjBuilder containing a count of the
+     * number of active instances for each registered service.
+     */
+    void reportServiceInfo(BSONObjBuilder* result);
+
     void onStartup(OperationContext*) final;
     void onShutdown() final;
     void onStepUpBegin(OperationContext*, long long term) final {}
diff --git a/src/mongo/db/repl/primary_only_service_test.cpp b/src/mongo/db/repl/primary_only_service_test.cpp
index d89005e8b1d..d7c76b6b7cb 100644
--- a/src/mongo/db/repl/primary_only_service_test.cpp
+++ b/src/mongo/db/repl/primary_only_service_test.cpp
@@ -367,6 +367,40 @@ TEST_F(PrimaryOnlyServiceTest, DoubleCreateInstance) {
     TestServiceHangDuringInitialization.setMode(FailPoint::off);
 }
 
+TEST_F(PrimaryOnlyServiceTest, ReportServiceInfo) {
+    {
+        BSONObjBuilder resultBuilder;
+        _registry->reportServiceInfo(&resultBuilder);
+
+        ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 0)),
+                          resultBuilder.obj());
+    }
+
+    // Make sure the instance doesn't complete.
+    TestServiceHangDuringInitialization.setMode(FailPoint::alwaysOn);
+    auto instance = TestService::Instance::getOrCreate(_service, BSON("_id" << 0 << "state" << 0));
+
+    {
+        BSONObjBuilder resultBuilder;
+        _registry->reportServiceInfo(&resultBuilder);
+
+        ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 1)),
+                          resultBuilder.obj());
+    }
+
+    auto instance2 = TestService::Instance::getOrCreate(_service, BSON("_id" << 1 << "state" << 0));
+
+    {
+        BSONObjBuilder resultBuilder;
+        _registry->reportServiceInfo(&resultBuilder);
+
+        ASSERT_BSONOBJ_EQ(BSON("primaryOnlyServices" << BSON("TestService" << 2)),
+                          resultBuilder.obj());
+    }
+
+    TestServiceHangDuringInitialization.setMode(FailPoint::off);
+}
+
 TEST_F(PrimaryOnlyServiceTest, CreateWhenNotPrimary) {
     _registry->onStepDown();
 
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 3cfe7be562b..3f03ceb9d28 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -357,7 +357,8 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown
     // Ensure that all writes are visible before reading. If we failed mid-batch, it would be
     // possible to read from a kNoOverlap ReadSource where not all writes to the minValid document
     // are visible, generating a writeConflict that would not resolve.
-    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx);
     if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() &&
diff --git a/src/mongo/db/repl/replication_info.cpp b/src/mongo/db/repl/replication_info.cpp
index ec551d390ea..188de5e8d16 100644
--- a/src/mongo/db/repl/replication_info.cpp
+++ b/src/mongo/db/repl/replication_info.cpp
@@ -50,6 +50,7 @@
 #include "mongo/db/ops/write_ops.h"
 #include "mongo/db/query/internal_plans.h"
 #include "mongo/db/repl/is_master_response.h"
+#include "mongo/db/repl/primary_only_service.h"
 #include "mongo/db/repl/replication_auth.h"
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/repl/replication_process.h"
@@ -86,12 +87,17 @@ constexpr auto kHelloString = "hello"_sd;
 constexpr auto kCamelCaseIsMasterString = "isMaster"_sd;
 constexpr auto kLowerCaseIsMasterString = "ismaster"_sd;
 
+void appendPrimaryOnlyServiceInfo(ServiceContext* serviceContext, BSONObjBuilder* result) {
+    auto registry = PrimaryOnlyServiceRegistry::get(serviceContext);
+    registry->reportServiceInfo(result);
+}
+
 /**
  * Appends replication-related fields to the isMaster response. Returns the topology version that
  * was included in the response.
  */
 TopologyVersion appendReplicationInfo(OperationContext* opCtx,
-                                      BSONObjBuilder& result,
+                                      BSONObjBuilder* result,
                                       bool appendReplicationProcess,
                                       bool useLegacyResponseFields,
                                       boost::optional<TopologyVersion> clientTopologyVersion,
@@ -108,9 +114,9 @@ TopologyVersion appendReplicationInfo(OperationContext* opCtx,
         }
         auto isMasterResponse =
             replCoord->awaitIsMasterResponse(opCtx, horizonParams, clientTopologyVersion, deadline);
-        result.appendElements(isMasterResponse->toBSON(useLegacyResponseFields));
+        result->appendElements(isMasterResponse->toBSON(useLegacyResponseFields));
         if (appendReplicationProcess) {
-            replCoord->appendSlaveInfoData(&result);
+            replCoord->appendSlaveInfoData(result);
         }
         invariant(isMasterResponse->getTopologyVersion());
         return isMasterResponse->getTopologyVersion().get();
@@ -142,10 +148,10 @@ TopologyVersion appendReplicationInfo(OperationContext* opCtx,
         opCtx->sleepFor(Milliseconds(*maxAwaitTimeMS));
     }
 
-    result.appendBool((useLegacyResponseFields ? "ismaster" : "isWritablePrimary"),
-                      ReplicationCoordinator::get(opCtx)->isMasterForReportingPurposes());
+    result->appendBool((useLegacyResponseFields ? "ismaster" : "isWritablePrimary"),
+                       ReplicationCoordinator::get(opCtx)->isMasterForReportingPurposes());
 
-    BSONObjBuilder topologyVersionBuilder(result.subobjStart("topologyVersion"));
+    BSONObjBuilder topologyVersionBuilder(result->subobjStart("topologyVersion"));
     currentTopologyVersion.serialize(&topologyVersionBuilder);
 
     return currentTopologyVersion;
@@ -171,12 +177,14 @@ public:
         // TODO SERVER-50219: Change useLegacyResponseFields to false once the serverStatus changes
         // to remove master-slave terminology are merged.
         appendReplicationInfo(opCtx,
-                              result,
+                              &result,
                               appendReplicationProcess,
                               true /* useLegacyResponseFields */,
                               boost::none /* clientTopologyVersion */,
                               boost::none /* maxAwaitTimeMS */);
 
+        appendPrimaryOnlyServiceInfo(opCtx->getServiceContext(), &result);
+
         auto rbid = ReplicationProcess::get(opCtx)->getRollbackID();
         if (ReplicationProcess::kUninitializedRollbackId != rbid) {
             result.append("rbid", rbid);
@@ -426,7 +434,7 @@ public:
 
         auto result = replyBuilder->getBodyBuilder();
         auto currentTopologyVersion = appendReplicationInfo(
-            opCtx, result, 0, useLegacyResponseFields, clientTopologyVersion, maxAwaitTimeMS);
+            opCtx, &result, 0, useLegacyResponseFields, clientTopologyVersion, maxAwaitTimeMS);
 
         if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
             const int configServerModeNumber = 2;
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index bba59beb626..c0c242421f9 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -131,7 +131,9 @@ public:
           _oplogApplicationEndPoint(oplogApplicationEndPoint) {}
 
     void startup(OperationContext* opCtx) final {
-        opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
+        invariant(opCtx->recoveryUnit()->getTimestampReadSource() ==
+                  RecoveryUnit::ReadSource::kNoTimestamp);
+
         _client = std::make_unique<DBDirectClient>(opCtx);
         BSONObj predicate = _oplogApplicationEndPoint
             ? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint)
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 159179530a9..371a2c6af5f 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -74,6 +74,7 @@
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/repl/rollback_gen.h"
 #include "mongo/db/service_context.h"
+#include "mongo/db/storage/checkpointer.h"
 #include "mongo/db/storage/control/journal_flusher.h"
 #include "mongo/db/storage/control/storage_control.h"
 #include "mongo/db/storage/durable_catalog.h"
@@ -1271,7 +1272,18 @@ void StorageInterfaceImpl::setStableTimestamp(ServiceContext* serviceCtx, Timest
                   "holdStableTimestamp"_attr = holdStableTimestamp);
         }
     });
-    serviceCtx->getStorageEngine()->setStableTimestamp(newStableTimestamp);
+
+    StorageEngine* storageEngine = serviceCtx->getStorageEngine();
+    Timestamp prevStableTimestamp = storageEngine->getStableTimestamp();
+
+    storageEngine->setStableTimestamp(newStableTimestamp);
+
+    Checkpointer* checkpointer = Checkpointer::get(serviceCtx);
+    if (checkpointer && !checkpointer->hasTriggeredFirstStableCheckpoint()) {
+        checkpointer->triggerFirstStableCheckpoint(prevStableTimestamp,
+                                                   storageEngine->getInitialDataTimestamp(),
+                                                   storageEngine->getStableTimestamp());
+    }
 }
 
 void StorageInterfaceImpl::setInitialDataTimestamp(ServiceContext* serviceCtx,
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp
index d821c6c3f26..a07833a2caf 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp
@@ -90,6 +90,13 @@ Status TenantMigrationDonorService::Instance::checkIfOptionsConflict(BSONObj opt
     return Status::OK();
 }
 
+void TenantMigrationDonorService::Instance::onReceiveDonorForgetMigration() {
+    stdx::lock_guard<Latch> lg(_mutex);
+    if (!_receivedDonorForgetMigrationPromise.getFuture().isReady()) {
+        _receivedDonorForgetMigrationPromise.emplaceValue();
+    }
+}
+
 repl::OpTime TenantMigrationDonorService::Instance::_insertStateDocument() {
     const auto stateDocBson = _stateDoc.toBSON();
 
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h
index ddf178121e4..6d1da3ac6d4 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.h
+++ b/src/mongo/db/repl/tenant_migration_donor_service.h
@@ -87,9 +87,7 @@ public:
             return _decisionPromise.getFuture();
         }
 
-        void onReceiveDonorForgetMigration() {
-            _receivedDonorForgetMigrationPromise.emplaceValue();
-        }
+        void onReceiveDonorForgetMigration();
 
     private:
         const NamespaceString _stateDocumentsNS = NamespaceString::kTenantMigrationDonorsNamespace;
@@ -142,6 +140,8 @@ public:
             const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
             RemoteCommandTargeter* recipientTargeter);
 
+        mutable Mutex _mutex = MONGO_MAKE_LATCH("TenantMigrationDonorService::_mutex");
+
         ServiceContext* _serviceContext;
 
         TenantMigrationDonorDocument _stateDoc;
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index 0c7a1f0727b..67fb840de64 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -262,8 +262,9 @@ std::pair<std::vector<OplogEntry>, bool> _readTransactionOperationsFromOplogChai
     const std::vector<OplogEntry*>& cachedOps,
     const bool checkForCommands) noexcept {
     bool isTransactionWithCommand = false;
-    // Traverse the oplog chain with its own snapshot and read timestamp.
-    ReadSourceScope readSourceScope(opCtx);
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     std::vector<OplogEntry> ops;
 
@@ -538,11 +539,10 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica
         LOGV2(21848, "Hit skipReconstructPreparedTransactions failpoint");
         return;
     }
-    // Read the transactions table and the oplog collection without a timestamp.
-    // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the
-    // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in
-    // this scope to be untimestamped.
-    ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
+
+    // Ensure future transactions read without a timestamp.
+    invariant(RecoveryUnit::ReadSource::kNoTimestamp ==
+              opCtx->recoveryUnit()->getTimestampReadSource());
 
     DBDirectClient client(opCtx);
     const auto cursor = client.query(NamespaceString::kSessionTransactionsTableNamespace,
diff --git a/src/mongo/db/s/README.md b/src/mongo/db/s/README.md
index bf23835067c..a2a4547f1f8 100644
--- a/src/mongo/db/s/README.md
+++ b/src/mongo/db/s/README.md
@@ -103,7 +103,6 @@ collection or database. A full refresh occurs when:
 Methods that will mark routing table cache information as stale (sharded collection).
 
 * [invalidateShardOrEntireCollectionEntryForShardedCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L226-L236)
-* [invalidateShardForShardedCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L262-L268)
 * [invalidateEntriesThatReferenceShard](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L270-L274)
 * [purgeCollection](https://github.com/mongodb/mongo/blob/62d9485657717bf61fbb870cb3d09b52b1a614dd/src/mongo/s/catalog_cache.h#L276-L280)
 
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 4f97f81b966..f35bda25acc 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -263,6 +263,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/catalog_raii',
         '$BUILD_DIR/mongo/db/repl/read_concern_args',
         '$BUILD_DIR/mongo/db/rw_concern_d',
+        '$BUILD_DIR/mongo/db/transaction',
         '$BUILD_DIR/mongo/executor/network_interface',
         '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client',
         '$BUILD_DIR/mongo/s/client/sharding_client',
diff --git a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
index fc74fafc0c5..6743958f1f5 100644
--- a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
@@ -59,8 +59,12 @@ class ConfigSvrDropCollectionCommand : public BasicCommand {
 public:
     ConfigSvrDropCollectionCommand() : BasicCommand("_configsvrDropCollection") {}
 
-    const std::set<std::string>& apiVersions() const {
-        return kApiVersions1;
+    /**
+     * We accept any apiVersion, apiStrict, and/or apiDeprecationErrors, and forward it with the
+     * "drop" command to shards.
+     */
+    bool acceptsAnyApiVersionParameters() const override {
+        return true;
     }
 
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
@@ -129,7 +133,9 @@ public:
         auto collDistLock = uassertStatusOK(
             catalogClient->getDistLockManager()->lock(opCtx, nss.ns(), "dropCollection", waitFor));
 
-        ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+        ON_BLOCK_EXIT([opCtx, nss] {
+            Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+        });
 
         _dropCollection(opCtx, nss);
 
diff --git a/src/mongo/db/s/config/configsvr_drop_database_command.cpp b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
index eb3ef547e70..896569a0afe 100644
--- a/src/mongo/db/s/config/configsvr_drop_database_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
@@ -29,6 +29,7 @@
 
 #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/auth/authorization_session.h"
 #include "mongo/db/client.h"
 #include "mongo/db/commands.h"
@@ -54,8 +55,12 @@ class ConfigSvrDropDatabaseCommand : public BasicCommand {
 public:
     ConfigSvrDropDatabaseCommand() : BasicCommand("_configsvrDropDatabase") {}
 
-    const std::set<std::string>& apiVersions() const {
-        return kApiVersions1;
+    /**
+     * We accept any apiVersion, apiStrict, and/or apiDeprecationErrors, and forward it with the
+     * "dropDatabase" command to shards.
+     */
+    bool acceptsAnyApiVersionParameters() const override {
+        return true;
     }
 
     AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
@@ -177,6 +182,7 @@ public:
             status, str::stream() << "Could not remove database '" << dbname << "' from metadata");
 
         // Send _flushDatabaseCacheUpdates to all shards
+        IgnoreAPIParametersBlock ignoreApiParametersBlock{opCtx};
         for (const ShardId& shardId : allShardIds) {
             const auto shard =
                 uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
index ee992bef2a9..3af7f601e95 100644
--- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
@@ -357,7 +357,7 @@ public:
             result << "collectionUUID" << *uuid;
         }
 
-        catalogCache->onEpochChange(nss);
+        catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
 
         return true;
     }
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index 204d8377764..2d40f65eaed 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -46,6 +46,7 @@
 #include "mongo/db/s/sharding_logging.h"
 #include "mongo/db/server_options.h"
 #include "mongo/db/snapshot_window_options_gen.h"
+#include "mongo/db/transaction_participant_gen.h"
 #include "mongo/logv2/log.h"
 #include "mongo/rpc/get_status_from_command_result.h"
 #include "mongo/s/catalog/sharding_catalog_client.h"
@@ -334,12 +335,14 @@ BSONObj getShardAndCollectionVersion(OperationContext* opCtx,
     ChunkVersion shardVersion;
 
     if (!swDonorShardVersion.isOK()) {
-        // The query to find 'nss' chunks belonging to the donor shard didn't return any, meaning
-        // the last chunk was donated
-        uassert(505770,
-                str::stream() << "Couldn't retrieve donor chunks from config server",
-                swDonorShardVersion.getStatus().code() == 50577);
-        shardVersion = ChunkVersion(0, 0, collectionVersion.epoch());
+        if (swDonorShardVersion.getStatus().code() == 50577) {
+            // The query to find 'nss' chunks belonging to the donor shard didn't return any chunks,
+            // meaning the last chunk for fromShard was donated. Gracefully handle the error.
+            shardVersion = ChunkVersion(0, 0, collectionVersion.epoch());
+        } else {
+            // Bubble up any other error
+            uassertStatusOK(swDonorShardVersion);
+        }
     } else {
         shardVersion = swDonorShardVersion.getValue();
     }
@@ -844,8 +847,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
     // Drop old history. Keep at least 1 entry so ChunkInfo::getShardIdAt finds valid history for
     // any query younger than the history window.
     if (!MONGO_unlikely(skipExpiringOldChunkHistory.shouldFail())) {
-        const int kHistorySecs = 10;
-        auto windowInSeconds = std::max(minSnapshotHistoryWindowInSeconds.load(), kHistorySecs);
+        auto windowInSeconds = std::max(std::max(minSnapshotHistoryWindowInSeconds.load(),
+                                                 gTransactionLifetimeLimitSeconds.load()),
+                                        10);
         int entriesDeleted = 0;
         while (newHistory.size() > 1 &&
                newHistory.back().getValidAfter().getSecs() + windowInSeconds <
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
index d6544e922d2..381a5e62029 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
@@ -42,6 +42,7 @@
 #include "mongo/client/read_preference.h"
 #include "mongo/client/remote_command_targeter.h"
 #include "mongo/client/replica_set_monitor.h"
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/auth/authorization_session_impl.h"
 #include "mongo/db/catalog/collection_options.h"
 #include "mongo/db/client.h"
@@ -398,6 +399,7 @@ void sendSSVToAllShards(OperationContext* opCtx, const NamespaceString& nss) {
 
     auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
 
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
     for (const auto& shardEntry : allShards) {
         const auto& shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardEntry.getName()));
 
@@ -417,6 +419,7 @@ void sendSSVToAllShards(OperationContext* opCtx, const NamespaceString& nss) {
 }
 
 void removeChunksAndTagsForDroppedCollection(OperationContext* opCtx, const NamespaceString& nss) {
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
     const auto catalogClient = Grid::get(opCtx)->catalogClient();
 
     // Remove chunk data
@@ -502,6 +505,8 @@ void ShardingCatalogManager::ensureDropCollectionCompleted(OperationContext* opC
                 "Ensuring config entries from previous dropCollection are cleared",
                 "namespace"_attr = nss.ns());
     sendDropCollectionToAllShards(opCtx, nss);
+
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
     removeChunksAndTagsForDroppedCollection(opCtx, nss);
     sendSSVToAllShards(opCtx, nss);
 }
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 11bce269425..07f8f94daf9 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -466,7 +466,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() {
                             "Starting post-migration commit refresh on the shard",
                             "migrationId"_attr = _coordinator->getMigrationId());
 
-        forceShardFilteringMetadataRefresh(_opCtx, getNss(), true);
+        forceShardFilteringMetadataRefresh(_opCtx, getNss());
 
         LOGV2_DEBUG_OPTIONS(4817405,
                             2,
diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp
index 010f476773c..a2decb63c2d 100644
--- a/src/mongo/db/s/migration_util_test.cpp
+++ b/src/mongo/db/s/migration_util_test.cpp
@@ -522,7 +522,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
     _mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(
         Status(ErrorCodes::NamespaceNotFound, "dummy errmsg"));
-    forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+    forceShardFilteringMetadataRefresh(opCtx, kNss);
 
     auto cleanupCompleteFuture = migrationutil::submitRangeDeletionTask(opCtx, deletionTask);
 
@@ -553,7 +553,7 @@ TEST_F(SubmitRangeDeletionTaskTest, SucceedsIfFilteringMetadataUUIDMatchesTaskUU
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
         makeChangedChunks(ChunkVersion(1, 0, kEpoch)));
     _mockCatalogClient->setCollections({coll});
-    forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+    forceShardFilteringMetadataRefresh(opCtx, kNss);
 
     // The task should have been submitted successfully.
     auto cleanupCompleteFuture = migrationutil::submitRangeDeletionTask(opCtx, deletionTask);
@@ -596,7 +596,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
     _mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
     _mockCatalogCacheLoader->setCollectionRefreshReturnValue(
         Status(ErrorCodes::NamespaceNotFound, "dummy errmsg"));
-    forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+    forceShardFilteringMetadataRefresh(opCtx, kNss);
 
     auto collectionUUID = createCollectionAndGetUUID(kNss);
     auto deletionTask = createDeletionTask(kNss, collectionUUID, 0, 10, _myShardName);
@@ -633,7 +633,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
     _mockCatalogCacheLoader->setChunkRefreshReturnValue(
         makeChangedChunks(ChunkVersion(1, 0, staleEpoch)));
     _mockCatalogClient->setCollections({staleColl});
-    forceShardFilteringMetadataRefresh(opCtx, kNss, true);
+    forceShardFilteringMetadataRefresh(opCtx, kNss);
 
     auto collectionUUID = createCollectionAndGetUUID(kNss);
     auto deletionTask = createDeletionTask(kNss, collectionUUID, 0, 10, _myShardName);
diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp
index f8a321aea1a..aba2cd2f632 100644
--- a/src/mongo/db/s/set_shard_version_command.cpp
+++ b/src/mongo/db/s/set_shard_version_command.cpp
@@ -96,7 +96,7 @@ public:
         uassertStatusOK(shardingState->canAcceptShardedCommands());
 
         // Steps
-        // 1. Set the `authoritative` and `forceRefresh` variables from the command object.
+        // 1. Set the `authoritative` variable from the command object.
         //
         // 2. Validate all command parameters against the info in our ShardingState, and return an
         //    error if they do not match.
@@ -117,12 +117,6 @@ public:
         LastError::get(client).disable();
 
         const bool authoritative = cmdObj.getBoolField("authoritative");
-        // A flag that specifies whether the set shard version catalog refresh
-        // is allowed to join an in-progress refresh triggered by an other
-        // thread, or whether it's required to either a) trigger its own
-        // refresh or b) wait for a refresh to be started after it has entered the
-        // getCollectionRoutingInfoWithRefresh function
-        const bool forceRefresh = cmdObj.getBoolField("forceRefresh");
 
         // Step 2
 
@@ -241,11 +235,9 @@ public:
 
         const auto status = [&] {
             try {
-                // TODO SERVER-48990 remove this if-else: just call onShardVersionMismatch
+                // TODO (SERVER-50812) remove this if-else: just call onShardVersionMismatch
                 if (requestedVersion == requestedVersion.DROPPED()) {
-                    // Note: The forceRefresh flag controls whether we make sure to do our own
-                    // refresh or if we're okay with joining another thread
-                    forceShardFilteringMetadataRefresh(opCtx, nss, forceRefresh);
+                    forceShardFilteringMetadataRefresh(opCtx, nss);
                 } else {
                     onShardVersionMismatch(opCtx, nss, requestedVersion);
                 }
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index 1e39cd26dc8..317d80f2ec4 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -284,7 +284,7 @@ ScopedShardVersionCriticalSection::ScopedShardVersionCriticalSection(OperationCo
         migrationutil::recoverMigrationCoordinations(_opCtx, _nss);
     }
 
-    forceShardFilteringMetadataRefresh(_opCtx, _nss, true);
+    forceShardFilteringMetadataRefresh(_opCtx, _nss);
 }
 
 ScopedShardVersionCriticalSection::~ScopedShardVersionCriticalSection() {
@@ -334,9 +334,8 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
     invariant(shardingState->canAcceptShardedCommands());
 
     try {
-        const auto cm =
-            uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
-                opCtx, nss, true));
+        const auto cm = uassertStatusOK(
+            Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, nss));
 
         if (!cm.isSharded()) {
             return CollectionMetadata();
@@ -354,8 +353,7 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
 }
 
 ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
-                                                const NamespaceString& nss,
-                                                bool forceRefreshFromThisThread) {
+                                                const NamespaceString& nss) {
     invariant(!opCtx->lockState()->isLocked());
     invariant(!opCtx->getClient()->isInDirectClient());
 
@@ -366,9 +364,8 @@ ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
     auto* const shardingState = ShardingState::get(opCtx);
     invariant(shardingState->canAcceptShardedCommands());
 
-    const auto cm =
-        uassertStatusOK(Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
-            opCtx, nss, forceRefreshFromThisThread));
+    const auto cm = uassertStatusOK(
+        Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, nss));
 
     if (!cm.isSharded()) {
         // The collection is not sharded. Avoid using AutoGetCollection() as it returns the
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.h b/src/mongo/db/s/shard_filtering_metadata_refresh.h
index 774a370b9ef..317fab32f37 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.h
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.h
@@ -79,8 +79,7 @@ CollectionMetadata forceGetCurrentMetadata(OperationContext* opCtx, const Namesp
  * called with a lock
  */
 ChunkVersion forceShardFilteringMetadataRefresh(OperationContext* opCtx,
-                                                const NamespaceString& nss,
-                                                bool forceRefreshFromThisThread = false);
+                                                const NamespaceString& nss);
 
 /**
  * Should be called when any client request on this shard generates a StaleDbVersion exception.
diff --git a/src/mongo/db/s/shard_key_util.cpp b/src/mongo/db/s/shard_key_util.cpp
index e216f9f682d..9b71b8e1ec9 100644
--- a/src/mongo/db/s/shard_key_util.cpp
+++ b/src/mongo/db/s/shard_key_util.cpp
@@ -230,18 +230,12 @@ void ValidationBehaviorsShardCollection::createShardKeyIndex(
 
 ValidationBehaviorsRefineShardKey::ValidationBehaviorsRefineShardKey(OperationContext* opCtx,
                                                                      const NamespaceString& nss)
-    : _opCtx(opCtx) {
-    const auto cm = uassertStatusOK(
-        Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx, nss));
-    uassert(ErrorCodes::NamespaceNotSharded,
-            str::stream() << "refineCollectionShardKey namespace " << nss.toString()
-                          << " is not sharded",
-            cm.isSharded());
-    const auto minKeyShardId = cm.getMinKeyShardIdWithSimpleCollation();
-    _indexShard =
-        uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, minKeyShardId));
-    _cm = std::move(cm);
-}
+    : _opCtx(opCtx),
+      _cm(uassertStatusOK(
+          Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(opCtx,
+                                                                                       nss))),
+      _indexShard(uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(
+          opCtx, _cm.getMinKeyShardIdWithSimpleCollation()))) {}
 
 std::vector<BSONObj> ValidationBehaviorsRefineShardKey::loadIndexes(
     const NamespaceString& nss) const {
@@ -249,8 +243,7 @@ std::vector<BSONObj> ValidationBehaviorsRefineShardKey::loadIndexes(
         _opCtx,
         ReadPreferenceSetting(ReadPreference::PrimaryOnly),
         nss.db().toString(),
-        appendShardVersion(BSON("listIndexes" << nss.coll()),
-                           _cm->getVersion(_indexShard->getId())),
+        appendShardVersion(BSON("listIndexes" << nss.coll()), _cm.getVersion(_indexShard->getId())),
         Milliseconds(-1));
     if (indexesRes.getStatus().code() != ErrorCodes::NamespaceNotFound) {
         return uassertStatusOK(indexesRes).docs;
@@ -266,7 +259,7 @@ void ValidationBehaviorsRefineShardKey::verifyUsefulNonMultiKeyIndex(
         "admin",
         appendShardVersion(
             BSON(kCheckShardingIndexCmdName << nss.ns() << kKeyPatternField << proposedKey),
-            _cm->getVersion(_indexShard->getId())),
+            _cm.getVersion(_indexShard->getId())),
         Shard::RetryPolicy::kIdempotent));
     if (checkShardingIndexRes.commandStatus == ErrorCodes::UnknownError) {
         // CheckShardingIndex returns UnknownError if a compatible shard key index cannot be found,
diff --git a/src/mongo/db/s/shard_key_util.h b/src/mongo/db/s/shard_key_util.h
index d6e1802549c..e5ab23683eb 100644
--- a/src/mongo/db/s/shard_key_util.h
+++ b/src/mongo/db/s/shard_key_util.h
@@ -104,8 +104,10 @@ public:
 
 private:
     OperationContext* _opCtx;
+
+    ChunkManager _cm;
+
     std::shared_ptr<Shard> _indexShard;
-    boost::optional<ChunkManager> _cm;
 };
 
 /**
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 2b6d515148a..f01218bba70 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -113,16 +113,16 @@ MONGO_FAIL_POINT_DEFINE(waitAfterCommandFinishesExecution);
 MONGO_FAIL_POINT_DEFINE(failWithErrorCodeInRunCommand);
 
 // Tracks the number of times a legacy unacknowledged write failed due to
-// not master error resulted in network disconnection.
-Counter64 notMasterLegacyUnackWrites;
-ServerStatusMetricField<Counter64> displayNotMasterLegacyUnackWrites(
-    "repl.network.notMasterLegacyUnacknowledgedWrites", &notMasterLegacyUnackWrites);
+// not primary error resulted in network disconnection.
+Counter64 notPrimaryLegacyUnackWrites;
+ServerStatusMetricField<Counter64> displayNotPrimaryLegacyUnackWrites(
+    "repl.network.notPrimaryLegacyUnacknowledgedWrites", &notPrimaryLegacyUnackWrites);
 
-// Tracks the number of times an unacknowledged write failed due to not master error
+// Tracks the number of times an unacknowledged write failed due to not primary error
 // resulted in network disconnection.
-Counter64 notMasterUnackWrites;
-ServerStatusMetricField<Counter64> displayNotMasterUnackWrites(
-    "repl.network.notMasterUnacknowledgedWrites", &notMasterUnackWrites);
+Counter64 notPrimaryUnackWrites;
+ServerStatusMetricField<Counter64> displayNotPrimaryUnackWrites(
+    "repl.network.notPrimaryUnacknowledgedWrites", &notPrimaryUnackWrites);
 
 namespace {
 
@@ -1479,7 +1479,7 @@ DbResponse receivedCommands(OperationContext* opCtx,
         // Close the connection to get client to go through server selection again.
         if (LastError::get(opCtx->getClient()).hadNotPrimaryError()) {
             if (c && c->getReadWriteType() == Command::ReadWriteType::kWrite)
-                notMasterUnackWrites.increment();
+                notPrimaryUnackWrites.increment();
             uasserted(ErrorCodes::NotWritablePrimary,
                       str::stream()
                           << "Not-master error while processing '" << request.getCommandName()
@@ -1839,7 +1839,7 @@ DbResponse FireAndForgetOpRunner::run() {
     // Either way, we want to throw an exception here, which will cause the client to be
     // disconnected.
     if (LastError::get(hr->client()).hadNotPrimaryError()) {
-        notMasterLegacyUnackWrites.increment();
+        notPrimaryLegacyUnackWrites.increment();
         uasserted(ErrorCodes::NotWritablePrimary,
                   str::stream() << "Not-master error while processing '"
                                 << networkOpToString(hr->op()) << "' operation  on '"
diff --git a/src/mongo/db/stats/api_version_metrics.h b/src/mongo/db/stats/api_version_metrics.h
index fc1de1d9766..354312a3992 100644
--- a/src/mongo/db/stats/api_version_metrics.h
+++ b/src/mongo/db/stats/api_version_metrics.h
@@ -29,7 +29,7 @@
 
 #pragma once
 
-#include "mongo/db/initialize_api_parameters.h"
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/service_context.h"
 #include "mongo/platform/mutex.h"
 #include "mongo/rpc/metadata/client_metadata.h"
@@ -70,4 +70,4 @@ private:
     APIVersionMetricsMap _apiVersionMetrics;
 };
 
-}  // namespace mongo
-\ No newline at end of file
+}  // namespace mongo
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index 53ac37b0e30..f60d463a976 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -121,11 +121,13 @@ env.Library(
         'control/storage_control.cpp',
     ],
     LIBDEPS=[
+        'checkpointer',
         'journal_flusher',
     ],
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/base',
         '$BUILD_DIR/mongo/db/service_context',
+        'storage_options',
     ],
 )
 
@@ -513,6 +515,19 @@ env.Library(
 )
 
 env.Library(
+    target='checkpointer',
+    source=[
+        'checkpointer.cpp',
+    ],
+    LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/service_context',
+        '$BUILD_DIR/mongo/util/background_job',
+        'storage_options',
+    ],
+)
+
+env.Library(
     target='two_phase_index_build_knobs_idl',
     source=[
         env.Idlc('two_phase_index_build_knobs.idl')[0],
diff --git a/src/mongo/db/storage/checkpointer.cpp b/src/mongo/db/storage/checkpointer.cpp
new file mode 100644
index 00000000000..825e914d062
--- /dev/null
+++ b/src/mongo/db/storage/checkpointer.cpp
@@ -0,0 +1,168 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/checkpointer.h"
+
+#include "mongo/db/operation_context.h"
+#include "mongo/db/service_context.h"
+#include "mongo/db/storage/kv/kv_engine.h"
+#include "mongo/logv2/log.h"
+#include "mongo/util/concurrency/idle_thread_block.h"
+#include "mongo/util/fail_point.h"
+
+namespace mongo {
+
+namespace {
+
+const auto getCheckpointer = ServiceContext::declareDecoration<std::unique_ptr<Checkpointer>>();
+
+MONGO_FAIL_POINT_DEFINE(pauseCheckpointThread);
+
+}  // namespace
+
+Checkpointer* Checkpointer::get(ServiceContext* serviceCtx) {
+    return getCheckpointer(serviceCtx).get();
+}
+
+Checkpointer* Checkpointer::get(OperationContext* opCtx) {
+    return get(opCtx->getServiceContext());
+}
+
+void Checkpointer::set(ServiceContext* serviceCtx, std::unique_ptr<Checkpointer> newCheckpointer) {
+    auto& checkpointer = getCheckpointer(serviceCtx);
+    if (checkpointer) {
+        invariant(!checkpointer->running(),
+                  "Tried to reset the Checkpointer without shutting down the original instance.");
+    }
+    checkpointer = std::move(newCheckpointer);
+}
+
+void Checkpointer::run() {
+    ThreadClient tc(name(), getGlobalServiceContext());
+    LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name());
+
+    while (true) {
+        auto opCtx = tc->makeOperationContext();
+
+        {
+            stdx::unique_lock<Latch> lock(_mutex);
+            MONGO_IDLE_THREAD_BLOCK;
+
+            // Wait for 'storageGlobalParams.checkpointDelaySecs' seconds; or until either shutdown
+            // is signaled or a checkpoint is triggered.
+            _sleepCV.wait_for(lock,
+                              stdx::chrono::seconds(static_cast<std::int64_t>(
+                                  storageGlobalParams.checkpointDelaySecs)),
+                              [&] { return _shuttingDown || _triggerCheckpoint; });
+
+            // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing.
+            // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we
+            // need to wake up to check periodically. The wakeup to check period is arbitrary.
+            while (storageGlobalParams.checkpointDelaySecs == 0 && !_shuttingDown &&
+                   !_triggerCheckpoint) {
+                _sleepCV.wait_for(lock, stdx::chrono::seconds(static_cast<std::int64_t>(3)), [&] {
+                    return _shuttingDown || _triggerCheckpoint;
+                });
+            }
+
+            if (_shuttingDown) {
+                invariant(!_shutdownReason.isOK());
+                LOGV2_DEBUG(22309,
+                            1,
+                            "Stopping thread",
+                            "threadName"_attr = name(),
+                            "reason"_attr = _shutdownReason);
+                return;
+            }
+
+            // Clear the trigger so we do not immediately checkpoint again after this.
+            _triggerCheckpoint = false;
+        }
+
+        pauseCheckpointThread.pauseWhileSet();
+
+        const Date_t startTime = Date_t::now();
+
+        // TODO SERVER-50861: Access the storage engine via the ServiceContext.
+        _kvEngine->checkpoint();
+
+        const auto secondsElapsed = durationCount<Seconds>(Date_t::now() - startTime);
+        if (secondsElapsed >= 30) {
+            LOGV2_DEBUG(22308,
+                        1,
+                        "Checkpoint was slow to complete",
+                        "secondsElapsed"_attr = secondsElapsed);
+        }
+    }
+}
+
+void Checkpointer::triggerFirstStableCheckpoint(Timestamp prevStable,
+                                                Timestamp initialData,
+                                                Timestamp currStable) {
+    stdx::unique_lock<Latch> lock(_mutex);
+    invariant(!_hasTriggeredFirstStableCheckpoint);
+    if (prevStable < initialData && currStable >= initialData) {
+        LOGV2(22310,
+              "Triggering the first stable checkpoint",
+              "initialDataTimestamp"_attr = initialData,
+              "prevStableTimestamp"_attr = prevStable,
+              "currStableTimestamp"_attr = currStable);
+        _hasTriggeredFirstStableCheckpoint = true;
+        _triggerCheckpoint = true;
+        _sleepCV.notify_one();
+    }
+}
+
+bool Checkpointer::hasTriggeredFirstStableCheckpoint() {
+    stdx::unique_lock<Latch> lock(_mutex);
+    return _hasTriggeredFirstStableCheckpoint;
+}
+
+void Checkpointer::shutdown(const Status& reason) {
+    LOGV2(22322, "Shutting down checkpoint thread");
+
+    {
+        stdx::unique_lock<Latch> lock(_mutex);
+        _shuttingDown = true;
+        _shutdownReason = reason;
+
+        // Wake up the checkpoint thread early, to take a final checkpoint before shutting down, if
+        // one has not coincidentally just been taken.
+        _sleepCV.notify_one();
+    }
+
+    wait();
+    LOGV2(22323, "Finished shutting down checkpoint thread");
+}
+
+}  // namespace mongo
diff --git a/src/mongo/db/storage/checkpointer.h b/src/mongo/db/storage/checkpointer.h
new file mode 100644
index 00000000000..6c50974c2ba
--- /dev/null
+++ b/src/mongo/db/storage/checkpointer.h
@@ -0,0 +1,114 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/platform/mutex.h"
+#include "mongo/stdx/condition_variable.h"
+#include "mongo/util/background.h"
+
+namespace mongo {
+
+class KVEngine;
+class OperationContext;
+class ServiceContext;
+class Timestamp;
+
+class Checkpointer : public BackgroundJob {
+public:
+    Checkpointer(KVEngine* kvEngine)
+        : BackgroundJob(false /* deleteSelf */),
+          _kvEngine(kvEngine),
+          _shuttingDown(false),
+          _shutdownReason(Status::OK()),
+          _hasTriggeredFirstStableCheckpoint(false),
+          _triggerCheckpoint(false) {}
+
+    static Checkpointer* get(ServiceContext* serviceCtx);
+    static Checkpointer* get(OperationContext* opCtx);
+    static void set(ServiceContext* serviceCtx, std::unique_ptr<Checkpointer> newCheckpointer);
+
+    std::string name() const override {
+        return "Checkpointer";
+    }
+
+    /**
+     * Starts the checkpoint thread that runs every storageGlobalParams.checkpointDelaySecs seconds.
+     */
+    void run() override;
+
+    /**
+     * Triggers taking the first stable checkpoint if the stable timestamp has advanced past the
+     * initial data timestamp.
+     *
+     * The checkpoint thread runs automatically every storageGlobalParams.checkpointDelaySecs
+     * seconds. This function avoids potentially waiting that full duration for a stable checkpoint,
+     * initiating one immediately.
+     *
+     * Do not call this function if hasTriggeredFirstStableCheckpoint() returns true.
+     */
+    void triggerFirstStableCheckpoint(Timestamp prevStable,
+                                      Timestamp initialData,
+                                      Timestamp currStable);
+
+    /**
+     * Returns whether the first stable checkpoint has already been triggered.
+     */
+    bool hasTriggeredFirstStableCheckpoint();
+
+    /**
+     * Blocks until the checkpoint thread has been fully shutdown.
+     */
+    void shutdown(const Status& reason);
+
+private:
+    // A pointer to the KVEngine is maintained only due to unit testing limitations that don't fully
+    // setup the ServiceContext.
+    // TODO SERVER-50861: Remove this pointer.
+    KVEngine* const _kvEngine;
+
+    // Protects the state below.
+    Mutex _mutex = MONGO_MAKE_LATCH("Checkpointer::_mutex");
+
+    // The checkpoint thread idles on this condition variable for a particular time duration between
+    // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if
+    // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set.
+    stdx::condition_variable _sleepCV;
+
+    bool _shuttingDown;
+    Status _shutdownReason;
+
+    // This flag ensures the first stable checkpoint is only triggered once.
+    bool _hasTriggeredFirstStableCheckpoint;
+
+    // This flag allows the checkpoint thread to wake up early when _sleepCV is signaled.
+    bool _triggerCheckpoint;
+};
+
+}  // namespace mongo
diff --git a/src/mongo/db/storage/control/storage_control.cpp b/src/mongo/db/storage/control/storage_control.cpp
index f0b7e7d825f..50213d44dfc 100644
--- a/src/mongo/db/storage/control/storage_control.cpp
+++ b/src/mongo/db/storage/control/storage_control.cpp
@@ -35,7 +35,9 @@
 
 #include "mongo/db/operation_context.h"
 #include "mongo/db/service_context.h"
+#include "mongo/db/storage/checkpointer.h"
 #include "mongo/db/storage/control/journal_flusher.h"
+#include "mongo/db/storage/storage_options.h"
 #include "mongo/logv2/log.h"
 
 namespace mongo {
@@ -73,12 +75,25 @@ void startStorageControls(ServiceContext* serviceContext, bool forTestOnly) {
     journalFlusher->go();
     JournalFlusher::set(serviceContext, std::move(journalFlusher));
 
+    if (storageEngine->supportsCheckpoints() && !storageEngine->isEphemeral() &&
+        !storageGlobalParams.readOnly) {
+        std::unique_ptr<Checkpointer> checkpointer =
+            std::make_unique<Checkpointer>(storageEngine->getEngine());
+        checkpointer->go();
+        Checkpointer::set(serviceContext, std::move(checkpointer));
+    }
+
     areControlsStarted = true;
 }
 
 void stopStorageControls(ServiceContext* serviceContext, const Status& reason) {
     if (areControlsStarted) {
         JournalFlusher::get(serviceContext)->shutdown(reason);
+
+        auto checkpointer = Checkpointer::get(serviceContext);
+        if (checkpointer) {
+            checkpointer->shutdown(reason);
+        }
     }
 }
 
diff --git a/src/mongo/db/storage/durable_catalog.h b/src/mongo/db/storage/durable_catalog.h
index 2d9aecb32ee..b782b144f0b 100644
--- a/src/mongo/db/storage/durable_catalog.h
+++ b/src/mongo/db/storage/durable_catalog.h
@@ -128,6 +128,11 @@ public:
     virtual std::string newInternalIdent() = 0;
 
     /**
+     * Generate an internal resumable index build ident name.
+     */
+    virtual std::string newInternalResumableIndexBuildIdent() = 0;
+
+    /**
      * On success, returns the RecordId which identifies the new record store in the durable catalog
      * in addition to ownership of the new RecordStore.
      */
diff --git a/src/mongo/db/storage/durable_catalog_impl.cpp b/src/mongo/db/storage/durable_catalog_impl.cpp
index b991c213bc3..de8b719918b 100644
--- a/src/mongo/db/storage/durable_catalog_impl.cpp
+++ b/src/mongo/db/storage/durable_catalog_impl.cpp
@@ -65,6 +65,7 @@ const char kNamespaceFieldName[] = "ns";
 const char kNonRepairableFeaturesFieldName[] = "nonRepairable";
 const char kRepairableFeaturesFieldName[] = "repairable";
 const char kInternalIdentPrefix[] = "internal-";
+const char kResumableIndexBuildIdentStem[] = "resumable-index-build-";
 
 void appendPositionsOfBitsSet(uint64_t value, StringBuilder* sb) {
     invariant(sb);
@@ -427,8 +428,17 @@ bool DurableCatalogImpl::_hasEntryCollidingWithRand() const {
 }
 
 std::string DurableCatalogImpl::newInternalIdent() {
+    return _newInternalIdent("");
+}
+
+std::string DurableCatalogImpl::newInternalResumableIndexBuildIdent() {
+    return _newInternalIdent(kResumableIndexBuildIdentStem);
+}
+
+std::string DurableCatalogImpl::_newInternalIdent(StringData identStem) {
     StringBuilder buf;
     buf << kInternalIdentPrefix;
+    buf << identStem;
     buf << _next.fetchAndAdd(1) << '-' << _rand;
     return buf.str();
 }
@@ -765,6 +775,11 @@ bool DurableCatalogImpl::isInternalIdent(StringData ident) const {
     return ident.find(kInternalIdentPrefix) != std::string::npos;
 }
 
+bool DurableCatalogImpl::isResumableIndexBuildIdent(StringData ident) const {
+    invariant(isInternalIdent(ident), ident.toString());
+    return ident.find(kResumableIndexBuildIdentStem) != std::string::npos;
+}
+
 bool DurableCatalogImpl::isCollectionIdent(StringData ident) const {
     // Internal idents prefixed "internal-" should not be considered collections, because
     // they are not eligible for orphan recovery through repair.
diff --git a/src/mongo/db/storage/durable_catalog_impl.h b/src/mongo/db/storage/durable_catalog_impl.h
index b24816d333d..9b8fad96825 100644
--- a/src/mongo/db/storage/durable_catalog_impl.h
+++ b/src/mongo/db/storage/durable_catalog_impl.h
@@ -92,6 +92,8 @@ public:
 
     bool isInternalIdent(StringData ident) const;
 
+    bool isResumableIndexBuildIdent(StringData ident) const;
+
     bool isCollectionIdent(StringData ident) const;
 
     FeatureTracker* getFeatureTracker() const {
@@ -108,6 +110,7 @@ public:
     std::string getFilesystemPathForDb(const std::string& dbName) const;
 
     std::string newInternalIdent();
+    std::string newInternalResumableIndexBuildIdent();
 
     StatusWith<std::pair<RecordId, std::unique_ptr<RecordStore>>> createCollection(
         OperationContext* opCtx,
@@ -229,6 +232,8 @@ private:
      */
     std::string _newUniqueIdent(NamespaceString nss, const char* kind);
 
+    std::string _newInternalIdent(StringData identStem);
+
     // Helpers only used by constructor and init(). Don't call from elsewhere.
     static std::string _newRand();
     bool _hasEntryCollidingWithRand() const;
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
index b3da8bb0085..fd243b0c8c1 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine.h
@@ -173,6 +173,10 @@ public:
 
     Timestamp getOldestTimestamp() const override;
 
+    Timestamp getStableTimestamp() const override {
+        return Timestamp();
+    }
+
     void setOldestTimestamp(Timestamp newOldestTimestamp, bool force) override;
 
     std::map<Timestamp, std::shared_ptr<StringStore>> getHistory_forTest();
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
index fcf49f74442..e249daed751 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp
@@ -359,7 +359,7 @@ TEST_F(EphemeralForTestKVEngineTest, ReadOlderSnapshotsSimple) {
     ASSERT(!rs->findRecord(&opCtx, loc2, &rd));
 
     opCtx.recoveryUnit()->abandonSnapshot();
-    opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+    opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     ASSERT(rs->findRecord(&opCtx, loc1, &rd));
     ASSERT(rs->findRecord(&opCtx, loc2, &rd));
 }
@@ -452,7 +452,7 @@ TEST_F(EphemeralForTestKVEngineTest, SetReadTimestampBehindOldestTimestamp) {
     ASSERT_THROWS_CODE(rs->findRecord(&opCtx, loc2, &rd), DBException, ErrorCodes::SnapshotTooOld);
 
     opCtx.recoveryUnit()->abandonSnapshot();
-    opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+    opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     ASSERT(rs->findRecord(&opCtx, loc1, &rd));
     ASSERT(rs->findRecord(&opCtx, loc2, &rd));
 }
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
index 5b2e77e6292..44d73995482 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp
@@ -119,7 +119,6 @@ bool RecoveryUnit::forkIfNeeded() {
 
     boost::optional<Timestamp> readFrom = boost::none;
     switch (_timestampReadSource) {
-        case ReadSource::kUnset:
         case ReadSource::kNoTimestamp:
         case ReadSource::kMajorityCommitted:
         case ReadSource::kNoOverlap:
diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
index 0e0afbb1a13..c31d0d54d86 100644
--- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
+++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h
@@ -131,7 +131,7 @@ private:
     Timestamp _commitTimestamp = Timestamp::min();
 
     // Specifies which external source to use when setting read timestamps on transactions.
-    ReadSource _timestampReadSource = ReadSource::kUnset;
+    ReadSource _timestampReadSource = ReadSource::kNoTimestamp;
     boost::optional<Timestamp> _readAtTimestamp = boost::none;
 };
 
diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h
index 46dad070544..6c8c67df3c4 100644
--- a/src/mongo/db/storage/kv/kv_engine.h
+++ b/src/mongo/db/storage/kv/kv_engine.h
@@ -53,18 +53,6 @@ class SnapshotManager;
 class KVEngine {
 public:
     /**
-     * This function should only be called after the StorageEngine is set on the ServiceContext.
-     *
-     * Starts asycnhronous threads for a storage engine's integration layer. Any such thread
-     * generating an OperationContext should be initialized here.
-     *
-     * In order for OperationContexts to be generated with real Locker objects, the generation must
-     * occur after the StorageEngine is instantiated and set on the ServiceContext. Otherwise,
-     * OperationContexts are created with LockerNoops.
-     */
-    virtual void startAsyncThreads() {}
-
-    /**
      * During the startup process, the storage engine is one of the first components to be started
      * up and fully initialized. But that fully initialized storage engine may not be recognized as
      * the end for the remaining storage startup tasks that still need to be performed.
@@ -275,6 +263,8 @@ public:
         return false;
     }
 
+    virtual void checkpoint() {}
+
     virtual bool isDurable() const = 0;
 
     /**
diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
index 57bf3bf714d..1e928738d57 100644
--- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
+++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
@@ -372,7 +372,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) {
     // Not reading on the last applied timestamp returns the most recent data.
     auto op = makeOperation();
     auto ru = op->recoveryUnit();
-    ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+    ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     ASSERT_EQ(itCountOn(op), 1);
     ASSERT(readRecordOn(op, id));
 
@@ -408,7 +408,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) {
     // Not reading on the last local timestamp returns the most recent data.
     auto op = makeOperation();
     auto ru = op->recoveryUnit();
-    ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+    ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     ASSERT_EQ(itCountOn(op), 1);
     auto record = readRecordOn(op, id);
     ASSERT_EQ(std::string(record->data.data()), "Blue spotted stingray");
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index 5c8be96b528..2057f8854b3 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -392,11 +392,7 @@ public:
      */
     enum ReadSource {
         /**
-         * Do not read from a timestamp. This is the default.
-         */
-        kUnset,
-        /**
-         * Read without a timestamp explicitly.
+         * Read without a timestamp. This is the default.
          */
         kNoTimestamp,
         /**
@@ -424,8 +420,6 @@ public:
 
     static std::string toString(ReadSource rs) {
         switch (rs) {
-            case ReadSource::kUnset:
-                return "kUnset";
             case ReadSource::kNoTimestamp:
                 return "kNoTimestamp";
             case ReadSource::kMajorityCommitted:
@@ -455,7 +449,7 @@ public:
                                         boost::optional<Timestamp> provided = boost::none) {}
 
     virtual ReadSource getTimestampReadSource() const {
-        return ReadSource::kUnset;
+        return ReadSource::kNoTimestamp;
     };
 
     /**
diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp
index 5acbcd3a513..84af208d391 100644
--- a/src/mongo/db/storage/snapshot_helper.cpp
+++ b/src/mongo/db/storage/snapshot_helper.cpp
@@ -38,29 +38,37 @@
 #include "mongo/logv2/log.h"
 
 namespace mongo {
-namespace SnapshotHelper {
-bool canSwitchReadSource(OperationContext* opCtx) {
-
-    // Most readConcerns have behavior controlled at higher levels. Local and available are the only
-    // ReadConcerns that should consider changing, since they read without a timestamp by default.
+namespace {
+bool canReadAtLastApplied(OperationContext* opCtx) {
+    // Local and available are the only ReadConcern levels that allow their ReadSource to be
+    // overridden to read at lastApplied. They read without a timestamp by default, but this check
+    // allows user secondary reads from conflicting with oplog batch application by reading at a
+    // consistent point in time.
+    // Internal operations use DBDirectClient as a loopback to perform local operations, and they
+    // expect the same level of consistency guarantees as any user operation. For that reason,
+    // DBDirectClient should be able to change the owning operation's ReadSource in order to serve
+    // consistent data.
     const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
-    if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
-        readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) {
+    if ((opCtx->getClient()->isFromUserConnection() || opCtx->getClient()->isInDirectClient()) &&
+        (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
+         readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern)) {
         return true;
     }
-
     return false;
 }
+}  // namespace
 
+namespace SnapshotHelper {
 bool shouldReadAtLastApplied(OperationContext* opCtx,
                              const NamespaceString& nss,
                              std::string* reason) {
-
     // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot change
     // its ReadSource. It's important to note that it is possible for this to be false, but still be
     // holding the PBWM lock, explained below.
     if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) {
-        *reason = "conflicts with batch application";
+        if (reason) {
+            *reason = "conflicts with batch application";
+        }
         return false;
     }
 
@@ -71,16 +79,32 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
     // guaranteed to observe all previous writes. This may occur when multiple collection locks are
     // held concurrently, which is often the case when DBDirectClient is used.
     if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) {
-        *reason = "PBWM lock is held";
+        if (reason) {
+            *reason = "PBWM lock is held";
+        }
         LOGV2_DEBUG(20577, 1, "not reading at lastApplied because the PBWM lock is held");
         return false;
     }
 
-    // If we are in a replication state (like secondary or primary catch-up) where we are not
-    // accepting writes, we should read at lastApplied. If this node can accept writes, then no
-    // conflicting replication batches are being applied and we can read from the default snapshot.
+    // If this node can accept writes (i.e. primary), then no conflicting replication batches are
+    // being applied and we can read from the default snapshot. If we are in a replication state
+    // (like secondary or primary catch-up) where we are not accepting writes, we should read at
+    // lastApplied.
     if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) {
-        *reason = "primary";
+        if (reason) {
+            *reason = "primary";
+        }
+        return false;
+    }
+
+    // If we are not secondary, then we should not attempt to read at lastApplied because it may not
+    // be available or valid. Any operations reading outside of the primary or secondary states must
+    // be internal. We give these operations the benefit of the doubt rather than attempting to read
+    // at a lastApplied timestamp that is not valid.
+    if (!repl::ReplicationCoordinator::get(opCtx)->isInPrimaryOrSecondaryState(opCtx)) {
+        if (reason) {
+            *reason = "not primary or secondary";
+        }
         return false;
     }
 
@@ -88,7 +112,9 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
     // written by the replication system.  However, the oplog is special, as it *is* written by the
     // replication system.
     if (!nss.isReplicated() && !nss.isOplog()) {
-        *reason = "unreplicated collection";
+        if (reason) {
+            *reason = "unreplicated collection";
+        }
         return false;
     }
 
@@ -96,15 +122,14 @@ bool shouldReadAtLastApplied(OperationContext* opCtx,
 }
 boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
                                                            const NamespaceString& nss) {
-    const bool canSwitch = canSwitchReadSource(opCtx);
-    if (!canSwitch) {
+    if (!canReadAtLastApplied(opCtx)) {
         return boost::none;
     }
 
     const auto existing = opCtx->recoveryUnit()->getTimestampReadSource();
     std::string reason;
     const bool readAtLastApplied = shouldReadAtLastApplied(opCtx, nss, &reason);
-    if (existing == RecoveryUnit::ReadSource::kUnset) {
+    if (existing == RecoveryUnit::ReadSource::kNoTimestamp) {
         // Shifting from reading without a timestamp to reading with a timestamp can be dangerous
         // because writes will appear to vanish. This case is intended for new reads on secondaries
         // and query yield recovery after state transitions from primary to secondary.
@@ -122,13 +147,16 @@ boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opC
         // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to
         // reading without one. More writes will become visible.
         if (!readAtLastApplied) {
-            LOGV2_DEBUG(
-                4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason);
-            // This shift to kUnset assumes that callers will not make future attempts to manipulate
-            // their ReadSources after performing reads at an un-timetamped snapshot. The only
-            // exception is callers of this function that may need to change from kUnset to
-            // kLastApplied in the event of a catalog conflict or query yield.
-            return RecoveryUnit::ReadSource::kUnset;
+            LOGV2_DEBUG(4452902,
+                        2,
+                        "Changing ReadSource to kNoTimestamp",
+                        logAttrs(nss),
+                        "reason"_attr = reason);
+            // This shift to kNoTimestamp assumes that callers will not make future attempts to
+            // manipulate their ReadSources after performing reads at an un-timetamped snapshot. The
+            // only exception is callers of this function that may need to change from kNoTimestamp
+            // to kLastApplied in the event of a catalog conflict or query yield.
+            return RecoveryUnit::ReadSource::kNoTimestamp;
         }
     }
     return boost::none;
diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h
index fa8fdd85f24..c24dfd16d8c 100644
--- a/src/mongo/db/storage/snapshot_helper.h
+++ b/src/mongo/db/storage/snapshot_helper.h
@@ -37,6 +37,10 @@ namespace SnapshotHelper {
 boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
                                                            const NamespaceString& nss);
 
+bool shouldReadAtLastApplied(OperationContext* opCtx,
+                             const NamespaceString& nss,
+                             std::string* reason = nullptr);
+
 bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin,
                                        boost::optional<Timestamp> readTimestamp);
 }  // namespace SnapshotHelper
diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h
index aa06d951c9f..edf31b874fe 100644
--- a/src/mongo/db/storage/storage_engine.h
+++ b/src/mongo/db/storage/storage_engine.h
@@ -376,6 +376,14 @@ public:
         OperationContext* opCtx) = 0;
 
     /**
+     * Creates a temporary RecordStore on the storage engine for a resumable index build. On
+     * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
+     * record stores.
+     */
+    virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+        OperationContext* opCtx) = 0;
+
+    /**
      * Creates a temporary RecordStore on the storage engine from an existing ident on disk. On
      * startup after an unclean shutdown, the storage engine will drop any un-dropped temporary
      * record stores.
@@ -465,6 +473,12 @@ public:
                                      std::shared_ptr<Ident> ident) = 0;
 
     /**
+     * Called when the checkpoint thread instructs the storage engine to take a checkpoint. The
+     * underlying storage engine must take a checkpoint at this point.
+     */
+    virtual void checkpoint() = 0;
+
+    /**
      * Recovers the storage engine state to the last stable timestamp. "Stable" in this case
      * refers to a timestamp that is guaranteed to never be rolled back. The stable timestamp
      * used should be one provided by StorageEngine::setStableTimestamp().
@@ -509,6 +523,11 @@ public:
     virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) = 0;
 
     /**
+     * Returns the stable timestamp.
+     */
+    virtual Timestamp getStableTimestamp() const = 0;
+
+    /**
      * Tells the storage engine the timestamp of the data at startup. This is necessary because
      * timestamps are not persisted in the storage layer.
      */
diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp
index 88c183919e1..22c82a09eba 100644
--- a/src/mongo/db/storage/storage_engine_impl.cpp
+++ b/src/mongo/db/storage/storage_engine_impl.cpp
@@ -326,7 +326,7 @@ Status StorageEngineImpl::_recoverOrphanedCollection(OperationContext* opCtx,
     return Status::OK();
 }
 
-bool StorageEngineImpl::_handleInternalIdents(
+bool StorageEngineImpl::_handleInternalIdent(
     OperationContext* opCtx,
     const std::string& ident,
     InternalIdentReconcilePolicy internalIdentReconcilePolicy,
@@ -345,14 +345,15 @@ bool StorageEngineImpl::_handleInternalIdents(
         return true;
     }
 
+    if (!_catalog->isResumableIndexBuildIdent(ident)) {
+        return false;
+    }
+
     // When starting up after a clean shutdown and resumable index builds are supported, find the
     // internal idents that contain the relevant information to resume each index build and recover
     // the state.
     auto rs = _engine->getRecordStore(opCtx, "", ident, CollectionOptions());
 
-    // Look at the contents to determine whether this ident will contain information for
-    // resuming an index build.
-    // TODO SERVER-49215: differentiate the internal idents without looking at the contents.
     auto cursor = rs->getCursor(opCtx);
     auto record = cursor->next();
     if (record) {
@@ -360,36 +361,35 @@ bool StorageEngineImpl::_handleInternalIdents(
 
         // Parse the documents here so that we can restart the build if the document doesn't
         // contain all the necessary information to be able to resume building the index.
-        if (doc.hasField("phase")) {
-            ResumeIndexInfo resumeInfo;
-            try {
-                if (MONGO_unlikely(failToParseResumeIndexInfo.shouldFail())) {
-                    uasserted(ErrorCodes::FailPointEnabled,
-                              "failToParseResumeIndexInfo fail point is enabled");
-                }
-
-                resumeInfo = ResumeIndexInfo::parse(IDLParserErrorContext("ResumeIndexInfo"), doc);
-            } catch (const DBException& e) {
-                LOGV2(4916300, "Failed to parse resumable index info", "error"_attr = e.toStatus());
-
-                // Ignore the error so that we can restart the index build instead of resume it. We
-                // should drop the internal ident if we failed to parse.
-                internalIdentsToDrop->insert(ident);
-                return true;
+        ResumeIndexInfo resumeInfo;
+        try {
+            if (MONGO_unlikely(failToParseResumeIndexInfo.shouldFail())) {
+                uasserted(ErrorCodes::FailPointEnabled,
+                          "failToParseResumeIndexInfo fail point is enabled");
             }
 
-            reconcileResult->indexBuildsToResume.push_back(resumeInfo);
+            resumeInfo = ResumeIndexInfo::parse(IDLParserErrorContext("ResumeIndexInfo"), doc);
+        } catch (const DBException& e) {
+            LOGV2(4916300, "Failed to parse resumable index info", "error"_attr = e.toStatus());
 
-            // Once we have parsed the resume info, we can safely drop the internal ident.
+            // Ignore the error so that we can restart the index build instead of resume it. We
+            // should drop the internal ident if we failed to parse.
             internalIdentsToDrop->insert(ident);
-
-            LOGV2(4916301,
-                  "Found unfinished index build to resume",
-                  "buildUUID"_attr = resumeInfo.getBuildUUID(),
-                  "collectionUUID"_attr = resumeInfo.getCollectionUUID(),
-                  "phase"_attr = IndexBuildPhase_serializer(resumeInfo.getPhase()));
             return true;
         }
+
+        reconcileResult->indexBuildsToResume.push_back(resumeInfo);
+
+        // Once we have parsed the resume info, we can safely drop the internal ident.
+        internalIdentsToDrop->insert(ident);
+
+        LOGV2(4916301,
+              "Found unfinished index build to resume",
+              "buildUUID"_attr = resumeInfo.getBuildUUID(),
+              "collectionUUID"_attr = resumeInfo.getCollectionUUID(),
+              "phase"_attr = IndexBuildPhase_serializer(resumeInfo.getPhase()));
+
+        return true;
     }
 
     return false;
@@ -448,12 +448,12 @@ StatusWith<StorageEngine::ReconcileResult> StorageEngineImpl::reconcileCatalogAn
             continue;
         }
 
-        if (_handleInternalIdents(opCtx,
-                                  it,
-                                  internalIdentReconcilePolicy,
-                                  &reconcileResult,
-                                  &internalIdentsToDrop,
-                                  &allInternalIdents)) {
+        if (_handleInternalIdent(opCtx,
+                                 it,
+                                 internalIdentReconcilePolicy,
+                                 &reconcileResult,
+                                 &internalIdentsToDrop,
+                                 &allInternalIdents)) {
             continue;
         }
 
@@ -670,8 +670,6 @@ void StorageEngineImpl::finishInit() {
     // A storage engine may need to start threads that require OperationsContexts with real Lockers,
     // as opposed to LockerNoops. Placing the start logic here, after the StorageEngine has been
     // instantiated, causes makeOperationContext() to create LockerImpls instead of LockerNoops.
-    _engine->startAsyncThreads();
-
     if (_engine->supportsRecoveryTimestamp()) {
         _timestampMonitor = std::make_unique<TimestampMonitor>(
             _engine.get(), getGlobalServiceContext()->getPeriodicRunner());
@@ -864,10 +862,18 @@ std::unique_ptr<TemporaryRecordStore> StorageEngineImpl::makeTemporaryRecordStor
     OperationContext* opCtx) {
     std::unique_ptr<RecordStore> rs =
         _engine->makeTemporaryRecordStore(opCtx, _catalog->newInternalIdent());
-    LOGV2_DEBUG(22258,
+    LOGV2_DEBUG(22258, 1, "Created temporary record store", "ident"_attr = rs->getIdent());
+    return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
+}
+
+std::unique_ptr<TemporaryRecordStore>
+StorageEngineImpl::makeTemporaryRecordStoreForResumableIndexBuild(OperationContext* opCtx) {
+    std::unique_ptr<RecordStore> rs =
+        _engine->makeTemporaryRecordStore(opCtx, _catalog->newInternalResumableIndexBuildIdent());
+    LOGV2_DEBUG(4921500,
                 1,
-                "created temporary record store: {rs_getIdent}",
-                "rs_getIdent"_attr = rs->getIdent());
+                "Created temporary record store for resumable index build",
+                "ident"_attr = rs->getIdent());
     return std::make_unique<TemporaryKVRecordStore>(getEngine(), std::move(rs));
 }
 
@@ -885,6 +891,10 @@ void StorageEngineImpl::setStableTimestamp(Timestamp stableTimestamp, bool force
     _engine->setStableTimestamp(stableTimestamp, force);
 }
 
+Timestamp StorageEngineImpl::getStableTimestamp() const {
+    return _engine->getStableTimestamp();
+}
+
 void StorageEngineImpl::setInitialDataTimestamp(Timestamp initialDataTimestamp) {
     _engine->setInitialDataTimestamp(initialDataTimestamp);
 }
@@ -1025,6 +1035,10 @@ void StorageEngineImpl::addDropPendingIdent(const Timestamp& dropTimestamp,
     _dropPendingIdentReaper.addDropPendingIdent(dropTimestamp, nss, ident);
 }
 
+void StorageEngineImpl::checkpoint() {
+    _engine->checkpoint();
+}
+
 void StorageEngineImpl::_onMinOfCheckpointAndOldestTimestampChanged(const Timestamp& timestamp) {
     if (timestamp.isNull()) {
         return;
diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h
index 4761e1f3a38..fed128f9b59 100644
--- a/src/mongo/db/storage/storage_engine_impl.h
+++ b/src/mongo/db/storage/storage_engine_impl.h
@@ -113,6 +113,9 @@ public:
     virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(
         OperationContext* opCtx) override;
 
+    virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+        OperationContext* opCtx) override;
+
     virtual std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
         OperationContext* opCtx, StringData ident) override;
 
@@ -120,6 +123,8 @@ public:
 
     virtual void setStableTimestamp(Timestamp stableTimestamp, bool force = false) override;
 
+    virtual Timestamp getStableTimestamp() const override;
+
     virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
 
     virtual Timestamp getInitialDataTimestamp() const override;
@@ -312,6 +317,8 @@ public:
                              const NamespaceString& nss,
                              std::shared_ptr<Ident> ident) override;
 
+    void checkpoint() override;
+
     DurableCatalog* getCatalog() override {
         return _catalog.get();
     }
@@ -386,12 +393,12 @@ private:
      * Returns whether the given ident is an internal ident and if it should be dropped or used to
      * resume an index build.
      */
-    bool _handleInternalIdents(OperationContext* opCtx,
-                               const std::string& ident,
-                               InternalIdentReconcilePolicy internalIdentReconcilePolicy,
-                               ReconcileResult* reconcileResult,
-                               std::set<std::string>* internalIdentsToDrop,
-                               std::set<std::string>* allInternalIdents);
+    bool _handleInternalIdent(OperationContext* opCtx,
+                              const std::string& ident,
+                              InternalIdentReconcilePolicy internalIdentReconcilePolicy,
+                              ReconcileResult* reconcileResult,
+                              std::set<std::string>* internalIdentsToDrop,
+                              std::set<std::string>* allInternalIdents);
 
     class RemoveDBChange;
 
diff --git a/src/mongo/db/storage/storage_engine_mock.h b/src/mongo/db/storage/storage_engine_mock.h
index 3a4a14bd9e6..96eb8020b1d 100644
--- a/src/mongo/db/storage/storage_engine_mock.h
+++ b/src/mongo/db/storage/storage_engine_mock.h
@@ -93,6 +93,10 @@ public:
     std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStore(OperationContext* opCtx) final {
         return {};
     }
+    std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreForResumableIndexBuild(
+        OperationContext* opCtx) final {
+        return {};
+    }
     std::unique_ptr<TemporaryRecordStore> makeTemporaryRecordStoreFromExistingIdent(
         OperationContext* opCtx, StringData ident) final {
         return {};
@@ -134,6 +138,9 @@ public:
         MONGO_UNREACHABLE;
     }
     void setStableTimestamp(Timestamp stableTimestamp, bool force = false) final {}
+    Timestamp getStableTimestamp() const override {
+        return Timestamp();
+    }
     void setInitialDataTimestamp(Timestamp timestamp) final {}
     Timestamp getInitialDataTimestamp() const override {
         return Timestamp();
@@ -168,6 +175,7 @@ public:
     void addDropPendingIdent(const Timestamp& dropTimestamp,
                              const NamespaceString& nss,
                              std::shared_ptr<Ident> ident) final {}
+    void checkpoint() final {}
     Status currentFilesCompatible(OperationContext* opCtx) const final {
         return Status::OK();
     }
diff --git a/src/mongo/db/storage/storage_options.cpp b/src/mongo/db/storage/storage_options.cpp
index 7ba94afde29..431698a807d 100644
--- a/src/mongo/db/storage/storage_options.cpp
+++ b/src/mongo/db/storage/storage_options.cpp
@@ -58,6 +58,7 @@ void StorageGlobalParams::reset() {
     oplogMinRetentionHours.store(0.0);
     allowOplogTruncation = true;
     disableLockFreeReads = true;
+    checkpointDelaySecs = 0;
 }
 
 StorageGlobalParams storageGlobalParams;
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index f6284a06244..e7fe5331f96 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -123,6 +123,10 @@ struct StorageGlobalParams {
     // settings with which lock-free reads are incompatible: standalone mode; and
     // enableMajorityReadConcern=false.
     bool disableLockFreeReads;
+
+    // Delay in seconds between triggering the next checkpoint after the completion of the previous
+    // one. A value of 0 indicates that checkpointing will be skipped.
+    size_t checkpointDelaySecs;
 };
 
 extern StorageGlobalParams storageGlobalParams;
diff --git a/src/mongo/db/storage/wiredtiger/SConscript b/src/mongo/db/storage/wiredtiger/SConscript
index 0cf7d92ce08..5d24feec685 100644
--- a/src/mongo/db/storage/wiredtiger/SConscript
+++ b/src/mongo/db/storage/wiredtiger/SConscript
@@ -139,6 +139,7 @@ if wiredtiger:
             '$BUILD_DIR/mongo/db/service_context',
             '$BUILD_DIR/mongo/db/service_context_d',
             '$BUILD_DIR/mongo/db/service_context_test_fixture',
+            '$BUILD_DIR/mongo/db/storage/checkpointer',
             '$BUILD_DIR/mongo/db/storage/durable_catalog_impl',
             '$BUILD_DIR/mongo/db/storage/kv/kv_engine_test_harness',
             '$BUILD_DIR/mongo/db/storage/recovery_unit_test_harness',
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
index d7bba3ee94d..8149bab8757 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.cpp
@@ -43,11 +43,6 @@ WiredTigerGlobalOptions wiredTigerGlobalOptions;
 
 Status WiredTigerGlobalOptions::store(const moe::Environment& params) {
     // WiredTiger storage engine options
-    if (params.count("storage.syncPeriodSecs")) {
-        wiredTigerGlobalOptions.checkpointDelaySecs =
-            static_cast<size_t>(params["storage.syncPeriodSecs"].as<double>());
-    }
-
     if (!wiredTigerGlobalOptions.engineConfig.empty()) {
         LOGV2(22293,
               "Engine custom option: {wiredTigerGlobalOptions_engineConfig}",
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
index 21d4c522f3b..51546164c39 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.h
@@ -40,7 +40,6 @@ class WiredTigerGlobalOptions {
 public:
     WiredTigerGlobalOptions()
         : cacheSizeGB(0),
-          checkpointDelaySecs(0),
           statisticsLogDelaySecs(0),
           directoryForIndexes(false),
           maxCacheOverflowFileSizeGBDeprecated(0),
@@ -50,7 +49,6 @@ public:
     Status store(const optionenvironment::Environment& params);
 
     double cacheSizeGB;
-    size_t checkpointDelaySecs;
     size_t statisticsLogDelaySecs;
     std::string journalCompressor;
     bool directoryForIndexes;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 1553c1740fe..f169f952e05 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -119,8 +119,6 @@ namespace {
 MONGO_FAIL_POINT_DEFINE(WTPreserveSnapshotHistoryIndefinitely);
 MONGO_FAIL_POINT_DEFINE(WTSetOldestTSToStableTS);
 
-MONGO_FAIL_POINT_DEFINE(pauseCheckpointThread);
-
 }  // namespace
 
 bool WiredTigerFileVersion::shouldDowngrade(bool readOnly,
@@ -255,231 +253,6 @@ std::string toString(const StorageEngine::OldestActiveTransactionTimestampResult
     }
 }
 
-class WiredTigerKVEngine::WiredTigerCheckpointThread : public BackgroundJob {
-public:
-    explicit WiredTigerCheckpointThread(WiredTigerKVEngine* wiredTigerKVEngine,
-                                        WiredTigerSessionCache* sessionCache)
-        : BackgroundJob(false /* deleteSelf */),
-          _wiredTigerKVEngine(wiredTigerKVEngine),
-          _sessionCache(sessionCache) {}
-
-    virtual string name() const {
-        return "WTCheckpointThread";
-    }
-
-    virtual void run() {
-        ThreadClient tc(name(), getGlobalServiceContext());
-        LOGV2_DEBUG(22307, 1, "Starting thread", "threadName"_attr = name());
-
-        while (true) {
-            auto opCtx = tc->makeOperationContext();
-
-            {
-                stdx::unique_lock<Latch> lock(_mutex);
-                MONGO_IDLE_THREAD_BLOCK;
-
-                // Wait for 'wiredTigerGlobalOptions.checkpointDelaySecs' seconds; or until either
-                // shutdown is signaled or a checkpoint is triggered.
-                _condvar.wait_for(lock,
-                                  stdx::chrono::seconds(static_cast<std::int64_t>(
-                                      wiredTigerGlobalOptions.checkpointDelaySecs)),
-                                  [&] { return _shuttingDown || _triggerCheckpoint; });
-
-                // If the checkpointDelaySecs is set to 0, that means we should skip checkpointing.
-                // However, checkpointDelaySecs is adjustable by a runtime server parameter, so we
-                // need to wake up to check periodically. The wakeup to check period is arbitrary.
-                while (wiredTigerGlobalOptions.checkpointDelaySecs == 0 && !_shuttingDown &&
-                       !_triggerCheckpoint) {
-                    _condvar.wait_for(lock,
-                                      stdx::chrono::seconds(static_cast<std::int64_t>(3)),
-                                      [&] { return _shuttingDown || _triggerCheckpoint; });
-                }
-
-                if (_shuttingDown) {
-                    LOGV2_DEBUG(22309, 1, "Stopping thread", "threadName"_attr = name());
-                    return;
-                }
-
-                // Clear the trigger so we do not immediately checkpoint again after this.
-                _triggerCheckpoint = false;
-            }
-
-            pauseCheckpointThread.pauseWhileSet();
-
-            const Date_t startTime = Date_t::now();
-
-            const Timestamp stableTimestamp = _wiredTigerKVEngine->getStableTimestamp();
-            const Timestamp initialDataTimestamp = _wiredTigerKVEngine->getInitialDataTimestamp();
-
-            // The amount of oplog to keep is primarily dictated by a user setting. However, in
-            // unexpected cases, durable, recover to a timestamp storage engines may need to play
-            // forward from an oplog entry that would otherwise be truncated by the user
-            // setting. Furthermore, the entries in prepared or large transactions can refer to
-            // previous entries in the same transaction.
-            //
-            // Live (replication) rollback will replay oplogs from exactly the stable timestamp.
-            // With prepared or large transactions, it may require some additional entries prior to
-            // the stable timestamp. These requirements are summarized in getOplogNeededForRollback.
-            // Truncating the oplog at this point is sufficient for in-memory configurations, but
-            // could cause an unrecoverable scenario if the node crashed and has to play from the
-            // last stable checkpoint.
-            //
-            // By recording the oplog needed for rollback "now", then taking a stable checkpoint,
-            // we can safely assume that the oplog needed for crash recovery has caught up to the
-            // recorded value. After the checkpoint, this value will be published such that actors
-            // which truncate the oplog can read an updated value.
-            try {
-                // Three cases:
-                //
-                // First, initialDataTimestamp is Timestamp(0, 1) -> Take full checkpoint. This is
-                // when there is no consistent view of the data (i.e: during initial sync).
-                //
-                // Second, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on
-                // disk is prone to being rolled back. Hold off on checkpoints.  Hope that the
-                // stable timestamp surpasses the data on disk, allowing storage to persist newer
-                // copies to disk.
-                //
-                // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady
-                // state case.
-                if (initialDataTimestamp.asULL() <= 1) {
-                    UniqueWiredTigerSession session = _sessionCache->getSession();
-                    WT_SESSION* s = session->getSession();
-                    invariantWTOK(s->checkpoint(s, "use_timestamp=false"));
-                } else if (stableTimestamp < initialDataTimestamp) {
-                    LOGV2_FOR_RECOVERY(
-                        23985,
-                        2,
-                        "Stable timestamp is behind the initial data timestamp, skipping "
-                        "a checkpoint. StableTimestamp: {stableTimestamp} InitialDataTimestamp: "
-                        "{initialDataTimestamp}",
-                        "stableTimestamp"_attr = stableTimestamp.toString(),
-                        "initialDataTimestamp"_attr = initialDataTimestamp.toString());
-                } else {
-                    auto oplogNeededForRollback = _wiredTigerKVEngine->getOplogNeededForRollback();
-
-                    LOGV2_FOR_RECOVERY(
-                        23986,
-                        2,
-                        "Performing stable checkpoint. StableTimestamp: {stableTimestamp}, "
-                        "OplogNeededForRollback: {oplogNeededForRollback}",
-                        "stableTimestamp"_attr = stableTimestamp,
-                        "oplogNeededForRollback"_attr = toString(oplogNeededForRollback));
-
-                    UniqueWiredTigerSession session = _sessionCache->getSession();
-                    WT_SESSION* s = session->getSession();
-                    invariantWTOK(s->checkpoint(s, "use_timestamp=true"));
-
-                    if (oplogNeededForRollback.isOK()) {
-                        // Now that the checkpoint is durable, publish the oplog needed to recover
-                        // from it.
-                        stdx::lock_guard<Latch> lk(_oplogNeededForCrashRecoveryMutex);
-                        _oplogNeededForCrashRecovery.store(
-                            oplogNeededForRollback.getValue().asULL());
-                    }
-                }
-
-                const auto secondsElapsed = durationCount<Seconds>(Date_t::now() - startTime);
-                if (secondsElapsed >= 30) {
-                    LOGV2_DEBUG(22308,
-                                1,
-                                "Checkpoint took {secondsElapsed} seconds to complete.",
-                                "secondsElapsed"_attr = secondsElapsed);
-                }
-            } catch (const WriteConflictException&) {
-                // Temporary: remove this after WT-3483
-                LOGV2_WARNING(22346, "Checkpoint encountered a write conflict exception.");
-            } catch (const AssertionException& exc) {
-                invariant(ErrorCodes::isShutdownError(exc.code()), exc.what());
-            }
-        }
-    }
-
-    /**
-     * Returns true if we have already triggered taking the first checkpoint.
-     */
-    bool hasTriggeredFirstStableCheckpoint() {
-        stdx::unique_lock<Latch> lock(_mutex);
-        return _hasTriggeredFirstStableCheckpoint;
-    }
-
-    /**
-     * Triggers taking the first stable checkpoint, which is when the stable timestamp advances past
-     * the initial data timestamp.
-     *
-     * The checkpoint thread runs automatically every wiredTigerGlobalOptions.checkpointDelaySecs
-     * seconds. This function avoids potentially waiting that full duration for a stable checkpoint,
-     * initiating one immediately.
-     *
-     * Do not call this function if hasTriggeredFirstStableCheckpoint() returns true.
-     */
-    void triggerFirstStableCheckpoint(Timestamp prevStable,
-                                      Timestamp initialData,
-                                      Timestamp currStable) {
-        stdx::unique_lock<Latch> lock(_mutex);
-        invariant(!_hasTriggeredFirstStableCheckpoint);
-        if (prevStable < initialData && currStable >= initialData) {
-            LOGV2(22310,
-                  "Triggering the first stable checkpoint. Initial Data: {initialData} PrevStable: "
-                  "{prevStable} CurrStable: {currStable}",
-                  "Triggering the first stable checkpoint",
-                  "initialData"_attr = initialData,
-                  "prevStable"_attr = prevStable,
-                  "currStable"_attr = currStable);
-            _hasTriggeredFirstStableCheckpoint = true;
-            _triggerCheckpoint = true;
-            _condvar.notify_one();
-        }
-    }
-
-    std::uint64_t getOplogNeededForCrashRecovery() const {
-        return _oplogNeededForCrashRecovery.load();
-    }
-
-    /*
-     * Atomically assign _oplogNeededForCrashRecovery to a variable.
-     * _oplogNeededForCrashRecovery will not change during assignment.
-     */
-    void assignOplogNeededForCrashRecoveryTo(boost::optional<Timestamp>* timestamp) {
-        stdx::lock_guard<Latch> lk(_oplogNeededForCrashRecoveryMutex);
-        *timestamp = Timestamp(_oplogNeededForCrashRecovery.load());
-    }
-
-    void shutdown() {
-        {
-            stdx::unique_lock<Latch> lock(_mutex);
-            _shuttingDown = true;
-            // Wake up the checkpoint thread early, to take a final checkpoint before shutting
-            // down, if one has not coincidentally just been taken.
-            _condvar.notify_one();
-        }
-        wait();
-    }
-
-private:
-    WiredTigerKVEngine* _wiredTigerKVEngine;
-    WiredTigerSessionCache* _sessionCache;
-
-    Mutex _oplogNeededForCrashRecoveryMutex =
-        MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_oplogNeededForCrashRecoveryMutex");
-    AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery;
-
-    // Protects the state below.
-    Mutex _mutex = MONGO_MAKE_LATCH("WiredTigerCheckpointThread::_mutex");
-
-    // The checkpoint thread idles on this condition variable for a particular time duration between
-    // taking checkpoints. It can be triggered early to expedite either: immediate checkpointing if
-    // _triggerCheckpoint is set; or shutdown cleanup if _shuttingDown is set.
-    stdx::condition_variable _condvar;
-
-    bool _shuttingDown = false;
-
-    // This flag ensures the first stable checkpoint is only triggered once.
-    bool _hasTriggeredFirstStableCheckpoint = false;
-
-    // This flag allows the checkpoint thread to wake up early when _condvar is signaled.
-    bool _triggerCheckpoint = false;
-};
-
 namespace {
 TicketHolder openWriteTransaction(128);
 TicketHolder openReadTransaction(128);
@@ -759,16 +532,6 @@ WiredTigerKVEngine::~WiredTigerKVEngine() {
     _sessionCache.reset(nullptr);
 }
 
-void WiredTigerKVEngine::startAsyncThreads() {
-    if (!_ephemeral) {
-        if (!_readOnly) {
-            _checkpointThread =
-                std::make_unique<WiredTigerCheckpointThread>(this, _sessionCache.get());
-            _checkpointThread->go();
-        }
-    }
-}
-
 void WiredTigerKVEngine::notifyStartupComplete() {
     WiredTigerUtil::notifyStartupComplete();
 }
@@ -898,11 +661,6 @@ void WiredTigerKVEngine::cleanShutdown() {
         _sessionSweeper->shutdown();
         LOGV2(22319, "Finished shutting down session sweeper thread");
     }
-    if (_checkpointThread) {
-        LOGV2(22322, "Shutting down checkpoint thread");
-        _checkpointThread->shutdown();
-        LOGV2(22323, "Finished shutting down checkpoint thread");
-    }
     LOGV2_FOR_RECOVERY(23988,
                        2,
                        "Shutdown timestamps.",
@@ -1385,7 +1143,7 @@ WiredTigerKVEngine::beginNonBlockingBackup(OperationContext* opCtx,
 
     // Oplog truncation thread won't remove oplog since the checkpoint pinned by the backup cursor.
     stdx::lock_guard<Latch> lock(_oplogPinnedByBackupMutex);
-    _checkpointThread->assignOplogNeededForCrashRecoveryTo(&_oplogPinnedByBackup);
+    _oplogPinnedByBackup = Timestamp(_oplogNeededForCrashRecovery.load());
     auto pinOplogGuard = makeGuard([&] { _oplogPinnedByBackup = boost::none; });
 
     // Persist the sizeStorer information to disk before opening the backup cursor. We aren't
@@ -1907,6 +1665,74 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
     return true;
 }
 
+void WiredTigerKVEngine::checkpoint() {
+    const Timestamp stableTimestamp = getStableTimestamp();
+    const Timestamp initialDataTimestamp = getInitialDataTimestamp();
+
+    // The amount of oplog to keep is primarily dictated by a user setting. However, in unexpected
+    // cases, durable, recover to a timestamp storage engines may need to play forward from an oplog
+    // entry that would otherwise be truncated by the user setting. Furthermore, the entries in
+    // prepared or large transactions can refer to previous entries in the same transaction.
+    //
+    // Live (replication) rollback will replay the oplog from exactly the stable timestamp. With
+    // prepared or large transactions, it may require some additional entries prior to the stable
+    // timestamp. These requirements are summarized in getOplogNeededForRollback. Truncating the
+    // oplog at this point is sufficient for in-memory configurations, but could cause an
+    // unrecoverable scenario if the node crashed and has to play from the last stable checkpoint.
+    //
+    // By recording the oplog needed for rollback "now", then taking a stable checkpoint, we can
+    // safely assume that the oplog needed for crash recovery has caught up to the recorded value.
+    // After the checkpoint, this value will be published such that actors which truncate the oplog
+    // can read an updated value.
+    try {
+        // Three cases:
+        //
+        // First, initialDataTimestamp is Timestamp(0, 1) -> Take full checkpoint. This is when
+        // there is no consistent view of the data (i.e: during initial sync).
+        //
+        // Second, stableTimestamp < initialDataTimestamp: Skip checkpoints. The data on disk is
+        // prone to being rolled back. Hold off on checkpoints.  Hope that the stable timestamp
+        // surpasses the data on disk, allowing storage to persist newer copies to disk.
+        //
+        // Third, stableTimestamp >= initialDataTimestamp: Take stable checkpoint. Steady state
+        // case.
+        if (initialDataTimestamp.asULL() <= 1) {
+            UniqueWiredTigerSession session = _sessionCache->getSession();
+            WT_SESSION* s = session->getSession();
+            invariantWTOK(s->checkpoint(s, "use_timestamp=false"));
+        } else if (stableTimestamp < initialDataTimestamp) {
+            LOGV2_FOR_RECOVERY(
+                23985,
+                2,
+                "Stable timestamp is behind the initial data timestamp, skipping a checkpoint.",
+                "stableTimestamp"_attr = stableTimestamp.toString(),
+                "initialDataTimestamp"_attr = initialDataTimestamp.toString());
+        } else {
+            auto oplogNeededForRollback = getOplogNeededForRollback();
+
+            LOGV2_FOR_RECOVERY(23986,
+                               2,
+                               "Performing stable checkpoint.",
+                               "stableTimestamp"_attr = stableTimestamp,
+                               "oplogNeededForRollback"_attr = toString(oplogNeededForRollback));
+
+            UniqueWiredTigerSession session = _sessionCache->getSession();
+            WT_SESSION* s = session->getSession();
+            invariantWTOK(s->checkpoint(s, "use_timestamp=true"));
+
+            if (oplogNeededForRollback.isOK()) {
+                // Now that the checkpoint is durable, publish the oplog needed to recover from it.
+                _oplogNeededForCrashRecovery.store(oplogNeededForRollback.getValue().asULL());
+            }
+        }
+    } catch (const WriteConflictException&) {
+        // TODO SERVER-50824: Check if this can be removed now that WT-3483 is done.
+        LOGV2_WARNING(22346, "Checkpoint encountered a write conflict exception.");
+    } catch (const AssertionException& exc) {
+        invariant(ErrorCodes::isShutdownError(exc.code()), exc.what());
+    }
+}
+
 bool WiredTigerKVEngine::hasIdent(OperationContext* opCtx, StringData ident) const {
     return _hasUri(WiredTigerRecoveryUnit::get(opCtx)->getSession()->getSession(), _uri(ident));
 }
@@ -2045,10 +1871,6 @@ void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp, bool forc
     // After publishing a stable timestamp to WT, we can record the updated stable timestamp value
     // for the necessary oplog to keep.
     _stableTimestamp.store(stableTimestamp.asULL());
-    if (_checkpointThread && !_checkpointThread->hasTriggeredFirstStableCheckpoint()) {
-        _checkpointThread->triggerFirstStableCheckpoint(
-            prevStable, Timestamp(_initialDataTimestamp.load()), stableTimestamp);
-    }
 
     // If 'force' is set, then we have already set the oldest timestamp equal to the stable
     // timestamp, so there is nothing left to do.
@@ -2193,13 +2015,6 @@ StatusWith<Timestamp> WiredTigerKVEngine::recoverToStableTimestamp(OperationCont
         23989, 2, "WiredTiger::RecoverToStableTimestamp syncing size storer to disk.");
     syncSizeInfo(true);
 
-    if (!_ephemeral) {
-        LOGV2_FOR_ROLLBACK(
-            23990, 2, "WiredTiger::RecoverToStableTimestamp shutting down checkpoint thread.");
-        // Shutdown WiredTigerKVEngine owned accesses into the storage engine.
-        _checkpointThread->shutdown();
-    }
-
     const Timestamp stableTimestamp(_stableTimestamp.load());
     const Timestamp initialDataTimestamp(_initialDataTimestamp.load());
 
@@ -2216,11 +2031,6 @@ StatusWith<Timestamp> WiredTigerKVEngine::recoverToStableTimestamp(OperationCont
                 str::stream() << "Error rolling back to stable. Err: " << wiredtiger_strerror(ret)};
     }
 
-    if (!_ephemeral) {
-        _checkpointThread = std::make_unique<WiredTigerCheckpointThread>(this, _sessionCache.get());
-        _checkpointThread->go();
-    }
-
     _sizeStorer = std::make_unique<WiredTigerSizeStorer>(_conn, _sizeStorerUri, _readOnly);
 
     return {stableTimestamp};
@@ -2345,7 +2155,7 @@ boost::optional<Timestamp> WiredTigerKVEngine::getOplogNeededForCrashRecovery()
         return boost::none;
     }
 
-    return Timestamp(_checkpointThread->getOplogNeededForCrashRecovery());
+    return Timestamp(_oplogNeededForCrashRecovery.load());
 }
 
 Timestamp WiredTigerKVEngine::getPinnedOplog() const {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
index 9327ae7454f..bfd539e7815 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h
@@ -103,8 +103,6 @@ public:
 
     ~WiredTigerKVEngine();
 
-    void startAsyncThreads() override;
-
     void notifyStartupComplete() override;
 
     void setRecordStoreExtraOptions(const std::string& options);
@@ -119,6 +117,8 @@ public:
         return !isEphemeral();
     }
 
+    void checkpoint() override;
+
     bool isDurable() const override {
         return _durable;
     }
@@ -369,7 +369,6 @@ public:
 
 private:
     class WiredTigerSessionSweeper;
-    class WiredTigerCheckpointThread;
 
     /**
      * Opens a connection on the WiredTiger database 'path' with the configuration 'wtOpenConfig'.
@@ -458,7 +457,6 @@ private:
     const bool _keepDataHistory = true;
 
     std::unique_ptr<WiredTigerSessionSweeper> _sessionSweeper;
-    std::unique_ptr<WiredTigerCheckpointThread> _checkpointThread;
 
     std::string _rsOptions;
     std::string _indexOptions;
@@ -485,6 +483,8 @@ private:
     // timestamp. Provided by replication layer because WT does not persist timestamps.
     AtomicWord<std::uint64_t> _initialDataTimestamp;
 
+    AtomicWord<std::uint64_t> _oplogNeededForCrashRecovery;
+
     std::unique_ptr<WiredTigerEngineRuntimeConfigParameter> _runTimeConfigParam;
 
     mutable Mutex _highestDurableTimestampMutex =
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
index b870c017798..2580960a76c 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_test.cpp
@@ -43,7 +43,7 @@
 #include "mongo/db/repl/replication_coordinator_mock.h"
 #include "mongo/db/service_context.h"
 #include "mongo/db/service_context_test_fixture.h"
-#include "mongo/db/storage/wiredtiger/wiredtiger_global_options.h"
+#include "mongo/db/storage/checkpointer.h"
 #include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h"
 #include "mongo/db/storage/wiredtiger/wiredtiger_record_store.h"
 #include "mongo/logv2/log.h"
@@ -82,19 +82,16 @@ public:
 
 private:
     std::unique_ptr<WiredTigerKVEngine> makeEngine() {
-        auto engine = std::make_unique<WiredTigerKVEngine>(kWiredTigerEngineName,
-                                                           _dbpath.path(),
-                                                           _cs.get(),
-                                                           "",
-                                                           1,
-                                                           0,
-                                                           false,
-                                                           false,
-                                                           _forRepair,
-                                                           false);
-        // There are unit tests expecting checkpoints to occur asynchronously.
-        engine->startAsyncThreads();
-        return engine;
+        return std::make_unique<WiredTigerKVEngine>(kWiredTigerEngineName,
+                                                    _dbpath.path(),
+                                                    _cs.get(),
+                                                    "",
+                                                    1,
+                                                    0,
+                                                    false,
+                                                    false,
+                                                    _forRepair,
+                                                    false);
     }
 
     const std::unique_ptr<ClockSource> _cs = std::make_unique<ClockSourceMock>();
@@ -246,6 +243,9 @@ TEST_F(WiredTigerKVEngineRepairTest, UnrecoverableOrphanedDataFilesAreRebuilt) {
 }
 
 TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
+    std::unique_ptr<Checkpointer> checkpointer = std::make_unique<Checkpointer>(_engine);
+    checkpointer->go();
+
     auto opCtxPtr = makeOperationContext();
     // The initial data timestamp has to be set to take stable checkpoints. The first stable
     // timestamp greater than this will also trigger a checkpoint. The following loop of the
@@ -262,7 +262,7 @@ TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
 #endif
 #endif
     {
-        wiredTigerGlobalOptions.checkpointDelaySecs = 1;
+        storageGlobalParams.checkpointDelaySecs = 1;
     }
     ();
 
@@ -341,6 +341,8 @@ TEST_F(WiredTigerKVEngineTest, TestOplogTruncation) {
     _engine->setStableTimestamp(Timestamp(30, 1), false);
     callbackShouldFail.store(false);
     assertPinnedMovesSoon(Timestamp(40, 1));
+
+    checkpointer->shutdown({ErrorCodes::ShutdownInProgress, "Test finished"});
 }
 
 std::unique_ptr<KVHarnessHelper> makeHelper() {
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index 1167fd673f3..b3cc4c6dde7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -445,7 +445,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
     // transaction to establish a read timestamp, but only for ReadSources that are expected to have
     // read timestamps.
     switch (_timestampReadSource) {
-        case ReadSource::kUnset:
         case ReadSource::kNoTimestamp:
             return boost::none;
         case ReadSource::kMajorityCommitted:
@@ -484,7 +483,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
             return _readAtTimestamp;
 
         // The follow ReadSources returned values in the first switch block.
-        case ReadSource::kUnset:
         case ReadSource::kNoTimestamp:
         case ReadSource::kMajorityCommitted:
         case ReadSource::kProvided:
@@ -507,7 +505,6 @@ void WiredTigerRecoveryUnit::_txnOpen() {
     WT_SESSION* session = _session->getSession();
 
     switch (_timestampReadSource) {
-        case ReadSource::kUnset:
         case ReadSource::kNoTimestamp: {
             if (_isOplogReader) {
                 _oplogVisibleTs = static_cast<std::int64_t>(_oplogManager->getOplogReadTimestamp());
@@ -827,7 +824,6 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource,
                 "setting timestamp read source",
                 "readSource"_attr = toString(readSource),
                 "provided"_attr = ((provided) ? provided->toString() : "none"));
-
     invariant(!_isActive() || _timestampReadSource == readSource,
               str::stream() << "Current state: " << toString(_getState())
                             << ". Invalid internal state while setting timestamp read source: "
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
index 312a46f5c09..0d557fc6329 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h
@@ -250,7 +250,7 @@ private:
     bool _isTimestamped = false;
 
     // Specifies which external source to use when setting read timestamps on transactions.
-    ReadSource _timestampReadSource = ReadSource::kUnset;
+    ReadSource _timestampReadSource = ReadSource::kNoTimestamp;
 
     // Commits are assumed ordered.  Unordered commits are assumed to always need to reserve a
     // new optime, and thus always call oplogDiskLocRegister() on the record store.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index b50d4b79889..2dde320ceeb 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -61,13 +61,6 @@ public:
                   false,                  // .repair
                   false                   // .readOnly
           ) {
-        // Deliberately not calling _engine->startAsyncThreads() because it starts an asynchronous
-        // checkpointing thread that can interfere with unit tests manipulating checkpoints
-        // manually.
-        //
-        // Alternatively, we would have to start using wiredTigerGlobalOptions.checkpointDelaySecs
-        // to set a high enough value such that the async thread never runs during testing.
-
         repl::ReplicationCoordinator::set(
             getGlobalServiceContext(),
             std::unique_ptr<repl::ReplicationCoordinator>(new repl::ReplicationCoordinatorMock(
@@ -203,7 +196,8 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) {
     }
 
     // Read without a timestamp. The write should be visible.
-    ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset);
+    ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kNoTimestamp);
     RecordData unused;
     ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
 
@@ -237,7 +231,7 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) {
 
         // Read without a timestamp, and we should see the first and third records.
         opCtx1->recoveryUnit()->abandonSnapshot();
-        opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+        opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
         ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
         ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused));
         ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused));
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 742bfd087b4..de5874ae3a6 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -124,8 +124,9 @@ struct ActiveTransactionHistory {
 
 ActiveTransactionHistory fetchActiveTransactionHistory(OperationContext* opCtx,
                                                        const LogicalSessionId& lsid) {
-    // Restore the current timestamp read source after fetching transaction history.
-    ReadSourceScope readSourceScope(opCtx);
+    // Restore the current timestamp read source after fetching transaction history using
+    // DBDirectClient, which may change our ReadSource.
+    ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
 
     ActiveTransactionHistory result;
 
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index f898b21c112..37b71ce8589 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -33,11 +33,11 @@
 #include <iostream>
 #include <map>
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/catalog/uncommitted_collections.h"
 #include "mongo/db/commands/txn_cmds_gen.h"
 #include "mongo/db/concurrency/d_concurrency.h"
 #include "mongo/db/concurrency/locker.h"
-#include "mongo/db/initialize_api_parameters.h"
 #include "mongo/db/logical_session_id.h"
 #include "mongo/db/multi_key_path_tracker.h"
 #include "mongo/db/ops/update_request.h"
diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp
index 84d533e2069..022dfb970ed 100644
--- a/src/mongo/dbtests/querytests.cpp
+++ b/src/mongo/dbtests/querytests.cpp
@@ -117,7 +117,7 @@ protected:
         uassertStatusOK(indexer.insertAllDocumentsInCollection(&_opCtx, _collection));
         uassertStatusOK(
             indexer.drainBackgroundWrites(&_opCtx,
-                                          RecoveryUnit::ReadSource::kUnset,
+                                          RecoveryUnit::ReadSource::kNoTimestamp,
                                           IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
         uassertStatusOK(indexer.checkConstraints(&_opCtx));
         {
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index 750c8ac447d..d270e0467d7 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -103,7 +103,7 @@ public:
     OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) {
         _opCtx->recoveryUnit()->abandonSnapshot();
         if (ts.isNull()) {
-            _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+            _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
         } else {
             _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, ts);
         }
@@ -111,7 +111,7 @@ public:
 
     ~OneOffRead() {
         _opCtx->recoveryUnit()->abandonSnapshot();
-        _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+        _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     }
 
 private:
@@ -234,7 +234,7 @@ public:
      */
     void reset(NamespaceString nss) const {
         ::mongo::writeConflictRetry(_opCtx, "deleteAll", nss.ns(), [&] {
-            _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+            _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
             AutoGetCollection collRaii(_opCtx, nss, LockMode::MODE_X);
 
             if (collRaii) {
@@ -2057,7 +2057,7 @@ public:
                   firstInsert.asTimestamp());
 
         ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
-                                                RecoveryUnit::ReadSource::kUnset,
+                                                RecoveryUnit::ReadSource::kNoTimestamp,
                                                 IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
 
         auto indexCatalog = autoColl.getCollection()->getIndexCatalog();
@@ -2100,7 +2100,7 @@ public:
         setReplCoordAppliedOpTime(repl::OpTime(afterSecondInsert.asTimestamp(), presentTerm));
 
         ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
-                                                RecoveryUnit::ReadSource::kUnset,
+                                                RecoveryUnit::ReadSource::kNoTimestamp,
                                                 IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
 
         {
@@ -2843,7 +2843,7 @@ public:
 
         ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx));
         ASSERT_OK(indexer.drainBackgroundWrites(_opCtx,
-                                                RecoveryUnit::ReadSource::kUnset,
+                                                RecoveryUnit::ReadSource::kNoTimestamp,
                                                 IndexBuildInterceptor::DrainYieldPolicy::kNoYield));
 
 
diff --git a/src/mongo/executor/SConscript b/src/mongo/executor/SConscript
index 76956c8818e..b10cf01369a 100644
--- a/src/mongo/executor/SConscript
+++ b/src/mongo/executor/SConscript
@@ -31,6 +31,7 @@ env.Library(
         'remote_command_response.cpp',
     ],
     LIBDEPS=[
+        '$BUILD_DIR/mongo/db/api_parameters',
         '$BUILD_DIR/mongo/rpc/metadata',
         '$BUILD_DIR/mongo/util/net/network',
     ]
diff --git a/src/mongo/executor/remote_command_request.cpp b/src/mongo/executor/remote_command_request.cpp
index 875da25ef9f..4c35525e6a9 100644
--- a/src/mongo/executor/remote_command_request.cpp
+++ b/src/mongo/executor/remote_command_request.cpp
@@ -34,6 +34,7 @@
 #include <fmt/format.h>
 
 #include "mongo/bson/simple_bsonobj_comparator.h"
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/query/query_request.h"
 #include "mongo/platform/atomic_word.h"
@@ -86,6 +87,12 @@ RemoteCommandRequestBase::RemoteCommandRequestBase(RequestId requestId,
         cmdObj = cmdObj.addField(BSON("clientOperationKey" << operationKey.get()).firstElement());
     }
 
+    if (opCtx && APIParameters::get(opCtx).getParamsPassed()) {
+        BSONObjBuilder bob(std::move(cmdObj));
+        APIParameters::get(opCtx).appendInfo(&bob);
+        cmdObj = bob.obj();
+    }
+
     _updateTimeoutFromOpCtxDeadline(opCtx);
 }
 
diff --git a/src/mongo/s/catalog_cache.cpp b/src/mongo/s/catalog_cache.cpp
index 19846e62b48..d9c2500f2d3 100644
--- a/src/mongo/s/catalog_cache.cpp
+++ b/src/mongo/s/catalog_cache.cpp
@@ -55,6 +55,7 @@
 #include "mongo/util/timer.h"
 
 namespace mongo {
+
 const OperationContext::Decoration<bool> operationShouldBlockBehindCatalogCacheRefresh =
     OperationContext::declareDecoration<bool>();
 
@@ -68,81 +69,8 @@ namespace {
 const int kMaxInconsistentRoutingInfoRefreshAttempts = 3;
 
 const int kDatabaseCacheSize = 10000;
-/**
- * Returns whether two shard versions have a matching epoch.
- */
-bool shardVersionsHaveMatchingEpoch(boost::optional<ChunkVersion> wanted,
-                                    const ChunkVersion& received) {
-    return wanted && wanted->epoch() == received.epoch();
-};
-
-/**
- * Given an (optional) initial routing table and a set of changed chunks returned by the catalog
- * cache loader, produces a new routing table with the changes applied.
- *
- * If the collection is no longer sharded returns nullptr. If the epoch has changed, expects that
- * the 'collectionChunksList' contains the full contents of the chunks collection for that namespace
- * so that the routing table can be built from scratch.
- *
- * Throws ConflictingOperationInProgress if the chunk metadata was found to be inconsistent (not
- * containing all the necessary chunks, contains overlaps or chunks' epoch values are not the same
- * as that of the collection). Since this situation may be transient, due to the collection being
- * dropped or having its shard key refined concurrently, the caller must retry the reload up to some
- * configurable number of attempts.
- */
-std::shared_ptr<RoutingTableHistory> refreshCollectionRoutingInfo(
-    OperationContext* opCtx,
-    const NamespaceString& nss,
-    std::shared_ptr<RoutingTableHistory> existingRoutingInfo,
-    StatusWith<CatalogCacheLoader::CollectionAndChangedChunks> swCollectionAndChangedChunks) {
-    if (swCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound) {
-        return nullptr;
-    }
 
-    const auto collectionAndChunks = uassertStatusOK(std::move(swCollectionAndChangedChunks));
-
-    auto chunkManager = [&] {
-        // If we have routing info already and it's for the same collection epoch, we're updating.
-        // Otherwise, we're making a whole new routing table.
-        if (existingRoutingInfo &&
-            existingRoutingInfo->getVersion().epoch() == collectionAndChunks.epoch) {
-            if (collectionAndChunks.changedChunks.size() == 1 &&
-                collectionAndChunks.changedChunks[0].getVersion() ==
-                    existingRoutingInfo->getVersion())
-                return existingRoutingInfo;
-
-            return std::make_shared<RoutingTableHistory>(
-                existingRoutingInfo->makeUpdated(std::move(collectionAndChunks.reshardingFields),
-                                                 collectionAndChunks.changedChunks));
-        }
-
-        auto defaultCollator = [&]() -> std::unique_ptr<CollatorInterface> {
-            if (!collectionAndChunks.defaultCollation.isEmpty()) {
-                // The collation should have been validated upon collection creation
-                return uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext())
-                                           ->makeFromBSON(collectionAndChunks.defaultCollation));
-            }
-            return nullptr;
-        }();
-
-        return std::make_shared<RoutingTableHistory>(
-            RoutingTableHistory::makeNew(nss,
-                                         collectionAndChunks.uuid,
-                                         KeyPattern(collectionAndChunks.shardKeyPattern),
-                                         std::move(defaultCollator),
-                                         collectionAndChunks.shardKeyIsUnique,
-                                         collectionAndChunks.epoch,
-                                         std::move(collectionAndChunks.reshardingFields),
-                                         collectionAndChunks.changedChunks));
-    }();
-
-    std::set<ShardId> shardIds;
-    chunkManager->getAllShardIds(&shardIds);
-    for (const auto& shardId : shardIds) {
-        uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
-    }
-    return chunkManager;
-}
+const int kCollectionCacheSize = 10000;
 
 }  // namespace
 
@@ -155,7 +83,8 @@ CatalogCache::CatalogCache(ServiceContext* const service, CatalogCacheLoader& ca
           options.maxThreads = 6;
           return options;
       }())),
-      _databaseCache(service, *_executor, _cacheLoader) {
+      _databaseCache(service, *_executor, _cacheLoader),
+      _collectionCache(service, *_executor, _cacheLoader) {
     _executor->startup();
 }
 
@@ -190,111 +119,89 @@ StatusWith<CachedDatabaseInfo> CatalogCache::getDatabase(OperationContext* opCtx
     }
 }
 
-StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfo(OperationContext* opCtx,
-                                                                const NamespaceString& nss) {
-    return _getCollectionRoutingInfo(opCtx, nss).statusWithInfo;
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfoWithForcedRefresh(
-    OperationContext* opCtx, const NamespaceString& nss) {
-    setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
-    _createOrGetCollectionEntryAndMarkAsNeedsRefresh(nss);
-    return _getCollectionRoutingInfo(opCtx, nss);
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfo(OperationContext* opCtx,
-                                                                    const NamespaceString& nss) {
-    return _getCollectionRoutingInfoAt(opCtx, nss, boost::none);
-}
-
-
-StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoAt(OperationContext* opCtx,
-                                                                  const NamespaceString& nss,
-                                                                  Timestamp atClusterTime) {
-    return _getCollectionRoutingInfoAt(opCtx, nss, atClusterTime).statusWithInfo;
-}
-
-CatalogCache::RefreshResult CatalogCache::_getCollectionRoutingInfoAt(
+StatusWith<ChunkManager> CatalogCache::_getCollectionRoutingInfoAt(
     OperationContext* opCtx, const NamespaceString& nss, boost::optional<Timestamp> atClusterTime) {
-    invariant(!opCtx->lockState() || !opCtx->lockState()->isLocked(),
-              "Do not hold a lock while refreshing the catalog cache. Doing so would potentially "
-              "hold the lock during a network call, and can lead to a deadlock as described in "
-              "SERVER-37398.");
-    // This default value can cause a single unnecessary extra refresh if this thread did do the
-    // refresh but the refresh failed, or if the database or collection was not found, but only if
-    // the caller is getCollectionRoutingInfoWithRefresh with the parameter
-    // forceRefreshFromThisThread set to true
-    RefreshAction refreshActionTaken(RefreshAction::kDidNotPerformRefresh);
-    while (true) {
+    invariant(
+        !opCtx->lockState() || !opCtx->lockState()->isLocked(),
+        "Do not hold a lock while refreshing the catalog cache. Doing so would potentially hold "
+        "the lock during a network call, and can lead to a deadlock as described in SERVER-37398.");
+
+    try {
         const auto swDbInfo = getDatabase(opCtx, nss.db());
+
         if (!swDbInfo.isOK()) {
             if (swDbInfo == ErrorCodes::NamespaceNotFound) {
                 LOGV2_FOR_CATALOG_REFRESH(
-                    4947102,
+                    4947103,
                     2,
                     "Invalidating cached collection entry because its database has been dropped",
                     "namespace"_attr = nss);
-                purgeCollection(nss);
+                invalidateCollectionEntry_LINEARIZABLE(nss);
             }
-            return {swDbInfo.getStatus(), refreshActionTaken};
+            return swDbInfo.getStatus();
         }
 
         const auto dbInfo = std::move(swDbInfo.getValue());
 
-        stdx::unique_lock<Latch> ul(_mutex);
-
-        auto collEntry = _createOrGetCollectionEntry(ul, nss);
+        const auto cacheConsistency = gEnableFinerGrainedCatalogCacheRefresh &&
+                !operationShouldBlockBehindCatalogCacheRefresh(opCtx)
+            ? CacheCausalConsistency::kLatestCached
+            : CacheCausalConsistency::kLatestKnown;
 
-        if (collEntry->needsRefresh &&
-            (!gEnableFinerGrainedCatalogCacheRefresh || collEntry->epochHasChanged ||
-             operationShouldBlockBehindCatalogCacheRefresh(opCtx))) {
+        auto collEntryFuture = _collectionCache.acquireAsync(nss, cacheConsistency);
 
-            operationBlockedBehindCatalogCacheRefresh(opCtx) = true;
+        // If the entry is in the cache return inmediately.
+        if (collEntryFuture.isReady()) {
+            setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, false);
+            return ChunkManager(dbInfo.primaryId(),
+                                dbInfo.databaseVersion(),
+                                collEntryFuture.get(opCtx),
+                                atClusterTime);
+        }
 
-            auto refreshNotification = collEntry->refreshCompletionNotification;
-            if (!refreshNotification) {
-                refreshNotification = (collEntry->refreshCompletionNotification =
-                                           std::make_shared<Notification<Status>>());
-                _scheduleCollectionRefresh(ul, opCtx->getServiceContext(), collEntry, nss, 1);
-                refreshActionTaken = RefreshAction::kPerformedRefresh;
-            }
+        operationBlockedBehindCatalogCacheRefresh(opCtx) = true;
 
-            // Wait on the notification outside of the mutex
-            ul.unlock();
+        size_t acquireTries = 0;
+        Timer t;
 
-            auto refreshStatus = [&]() {
-                Timer t;
-                ON_BLOCK_EXIT([&] { _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros()); });
+        while (true) {
+            try {
+                auto collEntry = collEntryFuture.get(opCtx);
+                _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
 
-                try {
-                    const Milliseconds kReportingInterval{250};
-                    while (!refreshNotification->waitFor(opCtx, kReportingInterval)) {
-                        _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
-                        t.reset();
-                    }
+                setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, false);
 
-                    return refreshNotification->get(opCtx);
-                } catch (const DBException& ex) {
+                return ChunkManager(dbInfo.primaryId(),
+                                    dbInfo.databaseVersion(),
+                                    std::move(collEntry),
+                                    atClusterTime);
+            } catch (ExceptionFor<ErrorCodes::ConflictingOperationInProgress>& ex) {
+                _stats.totalRefreshWaitTimeMicros.addAndFetch(t.micros());
+                acquireTries++;
+                if (acquireTries == kMaxInconsistentRoutingInfoRefreshAttempts) {
                     return ex.toStatus();
                 }
-            }();
-
-            if (!refreshStatus.isOK()) {
-                return {refreshStatus, refreshActionTaken};
             }
 
-            // Once the refresh is complete, loop around to get the latest value
-            continue;
+            collEntryFuture = _collectionCache.acquireAsync(nss, cacheConsistency);
+            t.reset();
         }
-
-        return {ChunkManager(dbInfo.primaryId(),
-                             dbInfo.databaseVersion(),
-                             collEntry->routingInfo,
-                             atClusterTime),
-                refreshActionTaken};
+    } catch (const DBException& ex) {
+        return ex.toStatus();
     }
 }
 
+StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfo(OperationContext* opCtx,
+                                                                const NamespaceString& nss) {
+    return _getCollectionRoutingInfoAt(opCtx, nss, boost::none);
+}
+
+StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoAt(OperationContext* opCtx,
+                                                                  const NamespaceString& nss,
+                                                                  Timestamp atClusterTime) {
+    return _getCollectionRoutingInfoAt(opCtx, nss, atClusterTime);
+}
+
 StatusWith<CachedDatabaseInfo> CatalogCache::getDatabaseWithRefresh(OperationContext* opCtx,
                                                                     StringData dbName) {
     // TODO SERVER-49724: Make ReadThroughCache support StringData keys
@@ -303,32 +210,20 @@ StatusWith<CachedDatabaseInfo> CatalogCache::getDatabaseWithRefresh(OperationCon
 }
 
 StatusWith<ChunkManager> CatalogCache::getCollectionRoutingInfoWithRefresh(
-    OperationContext* opCtx, const NamespaceString& nss, bool forceRefreshFromThisThread) {
-    auto refreshResult = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss);
-    // We want to ensure that we don't join an in-progress refresh because that
-    // could violate causal consistency for this client. We don't need to actually perform the
-    // refresh ourselves but we do need the refresh to begin *after* this function is
-    // called, so calling it twice is enough regardless of what happens the
-    // second time. See SERVER-33954 for reasoning.
-    if (forceRefreshFromThisThread &&
-        refreshResult.actionTaken == RefreshAction::kDidNotPerformRefresh) {
-        refreshResult = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss);
-    }
-    return refreshResult.statusWithInfo;
+    OperationContext* opCtx, const NamespaceString& nss) {
+    _collectionCache.invalidate(nss);
+    setOperationShouldBlockBehindCatalogCacheRefresh(opCtx, true);
+    return getCollectionRoutingInfo(opCtx, nss);
 }
 
 StatusWith<ChunkManager> CatalogCache::getShardedCollectionRoutingInfoWithRefresh(
     OperationContext* opCtx, const NamespaceString& nss) {
-    auto swRoutingInfo = _getCollectionRoutingInfoWithForcedRefresh(opCtx, nss).statusWithInfo;
-    if (!swRoutingInfo.isOK())
-        return swRoutingInfo;
-
-    auto cri(std::move(swRoutingInfo.getValue()));
-    if (!cri.isSharded())
+    auto routingInfoStatus = getCollectionRoutingInfoWithRefresh(opCtx, nss);
+    if (routingInfoStatus.isOK() && !routingInfoStatus.getValue().isSharded()) {
         return {ErrorCodes::NamespaceNotSharded,
                 str::stream() << "Collection " << nss.ns() << " is not sharded."};
-
-    return cri;
+    }
+    return routingInfoStatus;
 }
 
 void CatalogCache::onStaleDatabaseVersion(const StringData dbName,
@@ -350,48 +245,49 @@ void CatalogCache::setOperationShouldBlockBehindCatalogCacheRefresh(OperationCon
     if (gEnableFinerGrainedCatalogCacheRefresh) {
         operationShouldBlockBehindCatalogCacheRefresh(opCtx) = shouldBlock;
     }
-};
+}
 
 void CatalogCache::invalidateShardOrEntireCollectionEntryForShardedCollection(
-    OperationContext* opCtx,
     const NamespaceString& nss,
-    boost::optional<ChunkVersion> wantedVersion,
-    const ChunkVersion& receivedVersion,
-    ShardId shardId) {
-    if (shardVersionsHaveMatchingEpoch(wantedVersion, receivedVersion)) {
-        _createOrGetCollectionEntryAndMarkShardStale(nss, shardId);
-    } else {
-        _createOrGetCollectionEntryAndMarkEpochStale(nss);
+    const boost::optional<ChunkVersion>& wantedVersion,
+    const ShardId& shardId) {
+    _stats.countStaleConfigErrors.addAndFetch(1);
+
+    auto collectionEntry = _collectionCache.peekLatestCached(nss);
+    if (collectionEntry && collectionEntry->optRt) {
+        collectionEntry->optRt->setShardStale(shardId);
     }
-};
 
-void CatalogCache::onEpochChange(const NamespaceString& nss) {
-    _createOrGetCollectionEntryAndMarkEpochStale(nss);
-};
+    if (wantedVersion) {
+        _collectionCache.advanceTimeInStore(
+            nss, ComparableChunkVersion::makeComparableChunkVersion(*wantedVersion));
+    } else {
+        _collectionCache.advanceTimeInStore(
+            nss, ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh());
+    }
+}
 
 void CatalogCache::checkEpochOrThrow(const NamespaceString& nss,
-                                     ChunkVersion targetCollectionVersion,
-                                     const ShardId& shardId) const {
-    stdx::lock_guard<Latch> lg(_mutex);
-    const auto itDb = _collectionsByDb.find(nss.db());
+                                     const ChunkVersion& targetCollectionVersion,
+                                     const ShardId& shardId) {
     uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
             str::stream() << "could not act as router for " << nss.ns()
                           << ", no entry for database " << nss.db(),
-            itDb != _collectionsByDb.end());
+            _databaseCache.peekLatestCached(nss.db().toString()));
 
-    auto itColl = itDb->second.find(nss.ns());
+    auto collectionValueHandle = _collectionCache.peekLatestCached(nss);
     uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
             str::stream() << "could not act as router for " << nss.ns()
                           << ", no entry for collection.",
-            itColl != itDb->second.end());
+            collectionValueHandle);
 
     uassert(StaleConfigInfo(nss, targetCollectionVersion, boost::none, shardId),
             str::stream() << "could not act as router for " << nss.ns() << ", wanted "
                           << targetCollectionVersion.toString()
                           << ", but found the collection was unsharded",
-            itColl->second->routingInfo);
+            collectionValueHandle->optRt);
 
-    auto foundVersion = itColl->second->routingInfo->getVersion();
+    auto foundVersion = collectionValueHandle->optRt->getVersion();
     uassert(StaleConfigInfo(nss, targetCollectionVersion, foundVersion, shardId),
             str::stream() << "could not act as router for " << nss.ns() << ", wanted "
                           << targetCollectionVersion.toString() << ", but found "
@@ -399,11 +295,6 @@ void CatalogCache::checkEpochOrThrow(const NamespaceString& nss,
             foundVersion.epoch() == targetCollectionVersion.epoch());
 }
 
-void CatalogCache::invalidateShardForShardedCollection(const NamespaceString& nss,
-                                                       const ShardId& staleShardId) {
-    _createOrGetCollectionEntryAndMarkShardStale(nss, staleShardId);
-}
-
 void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
     LOGV2_DEBUG(4997600,
                 1,
@@ -413,32 +304,24 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
     _databaseCache.invalidateCachedValueIf(
         [&](const DatabaseType& dbt) { return dbt.getPrimary() == shardId; });
 
-    stdx::lock_guard<Latch> lg(_mutex);
-
     // Invalidate collections which contain data on this shard.
-    for (const auto& [db, collInfoMap] : _collectionsByDb) {
-        for (const auto& [collNs, collRoutingInfoEntry] : collInfoMap) {
-            if (!collRoutingInfoEntry->needsRefresh && collRoutingInfoEntry->routingInfo) {
-                // The set of shards on which this collection contains chunks.
-                std::set<ShardId> shardsOwningDataForCollection;
-                collRoutingInfoEntry->routingInfo->getAllShardIds(&shardsOwningDataForCollection);
-
-                if (shardsOwningDataForCollection.find(shardId) !=
-                    shardsOwningDataForCollection.end()) {
-                    LOGV2_DEBUG(22647,
-                                3,
-                                "Invalidating cached collection {namespace} that has data "
-                                "on shard {shardId}",
-                                "Invalidating cached collection",
-                                "namespace"_attr = collNs,
-                                "shardId"_attr = shardId);
-
-                    collRoutingInfoEntry->needsRefresh = true;
-                    collRoutingInfoEntry->routingInfo->setShardStale(shardId);
-                }
-            }
-        }
-    }
+    _collectionCache.invalidateCachedValueIf([&](const OptionalRoutingTableHistory& ort) {
+        if (!ort.optRt)
+            return false;
+        const auto& rt = *ort.optRt;
+
+        std::set<ShardId> shardIds;
+        rt.getAllShardIds(&shardIds);
+
+        LOGV2_DEBUG(22647,
+                    3,
+                    "Invalidating cached collection {namespace} that has data "
+                    "on shard {shardId}",
+                    "Invalidating cached collection",
+                    "namespace"_attr = rt.nss(),
+                    "shardId"_attr = shardId);
+        return shardIds.find(shardId) != shardIds.end();
+    });
 
     LOGV2(22648,
           "Finished invalidating databases and collections with data on shard: {shardId}",
@@ -446,46 +329,28 @@ void CatalogCache::invalidateEntriesThatReferenceShard(const ShardId& shardId) {
           "shardId"_attr = shardId);
 }
 
-void CatalogCache::purgeCollection(const NamespaceString& nss) {
-    stdx::lock_guard<Latch> lg(_mutex);
-
-    auto itDb = _collectionsByDb.find(nss.db());
-    if (itDb == _collectionsByDb.end()) {
-        return;
-    }
-
-    itDb->second.erase(nss.ns());
-}
-
 void CatalogCache::purgeDatabase(StringData dbName) {
     _databaseCache.invalidate(dbName.toString());
-    stdx::lock_guard<Latch> lg(_mutex);
-    _collectionsByDb.erase(dbName);
+    _collectionCache.invalidateKeyIf(
+        [&](const NamespaceString& nss) { return nss.db() == dbName; });
 }
 
 void CatalogCache::purgeAllDatabases() {
     _databaseCache.invalidateAll();
-    stdx::lock_guard<Latch> lg(_mutex);
-    _collectionsByDb.clear();
+    _collectionCache.invalidateAll();
 }
 
 void CatalogCache::report(BSONObjBuilder* builder) const {
     BSONObjBuilder cacheStatsBuilder(builder->subobjStart("catalogCache"));
 
-    size_t numDatabaseEntries;
-    size_t numCollectionEntries{0};
-    {
-        numDatabaseEntries = _databaseCache.getCacheInfo().size();
-        stdx::lock_guard<Latch> ul(_mutex);
-        for (const auto& entry : _collectionsByDb) {
-            numCollectionEntries += entry.second.size();
-        }
-    }
+    const size_t numDatabaseEntries = _databaseCache.getCacheInfo().size();
+    const size_t numCollectionEntries = _collectionCache.getCacheInfo().size();
 
     cacheStatsBuilder.append("numDatabaseEntries", static_cast<long long>(numDatabaseEntries));
     cacheStatsBuilder.append("numCollectionEntries", static_cast<long long>(numCollectionEntries));
 
     _stats.report(&cacheStatsBuilder);
+    _collectionCache.reportStats(&cacheStatsBuilder);
 }
 
 void CatalogCache::checkAndRecordOperationBlockedByRefresh(OperationContext* opCtx,
@@ -519,188 +384,8 @@ void CatalogCache::checkAndRecordOperationBlockedByRefresh(OperationContext* opC
     }
 }
 
-void CatalogCache::_scheduleCollectionRefresh(WithLock lk,
-                                              ServiceContext* service,
-                                              std::shared_ptr<CollectionRoutingInfoEntry> collEntry,
-                                              NamespaceString const& nss,
-                                              int refreshAttempt) {
-    const auto existingRoutingInfo = collEntry->routingInfo;
-
-    // If we have an existing chunk manager, the refresh is considered "incremental", regardless of
-    // how many chunks are in the differential
-    const bool isIncremental(existingRoutingInfo);
-
-    if (isIncremental) {
-        _stats.numActiveIncrementalRefreshes.addAndFetch(1);
-        _stats.countIncrementalRefreshesStarted.addAndFetch(1);
-    } else {
-        _stats.numActiveFullRefreshes.addAndFetch(1);
-        _stats.countFullRefreshesStarted.addAndFetch(1);
-    }
-
-    // Invoked when one iteration of getChunksSince has completed, whether with success or error
-    const auto onRefreshCompleted = [this, t = Timer(), nss, isIncremental, existingRoutingInfo](
-                                        const Status& status,
-                                        RoutingTableHistory* routingInfoAfterRefresh) {
-        if (isIncremental) {
-            _stats.numActiveIncrementalRefreshes.subtractAndFetch(1);
-        } else {
-            _stats.numActiveFullRefreshes.subtractAndFetch(1);
-        }
-
-        if (!status.isOK()) {
-            _stats.countFailedRefreshes.addAndFetch(1);
-
-            LOGV2_OPTIONS(24103,
-                          {logv2::LogComponent::kShardingCatalogRefresh},
-                          "Error refreshing cached collection {namespace}; Took {duration} and "
-                          "failed due to {error}",
-                          "Error refreshing cached collection",
-                          "namespace"_attr = nss,
-                          "duration"_attr = Milliseconds(t.millis()),
-                          "error"_attr = redact(status));
-        } else if (routingInfoAfterRefresh) {
-            const int logLevel =
-                (!existingRoutingInfo ||
-                 (existingRoutingInfo &&
-                  routingInfoAfterRefresh->getVersion() != existingRoutingInfo->getVersion()))
-                ? 0
-                : 1;
-            LOGV2_FOR_CATALOG_REFRESH(
-                24104,
-                logLevel,
-                "Refreshed cached collection {namespace} to version {newVersion} from version "
-                "{oldVersion}. Took {duration}",
-                "Refreshed cached collection",
-                "namespace"_attr = nss,
-                "newVersion"_attr = routingInfoAfterRefresh->getVersion(),
-                "oldVersion"_attr =
-                    (existingRoutingInfo
-                         ? (" from version " + existingRoutingInfo->getVersion().toString())
-                         : ""),
-                "duration"_attr = Milliseconds(t.millis()));
-        } else {
-            LOGV2_OPTIONS(24105,
-                          {logv2::LogComponent::kShardingCatalogRefresh},
-                          "Collection {namespace} was found to be unsharded after refresh that "
-                          "took {duration}",
-                          "Collection has found to be unsharded after refresh",
-                          "namespace"_attr = nss,
-                          "duration"_attr = Milliseconds(t.millis()));
-        }
-    };
-
-    // Invoked if getChunksSince resulted in error or threw an exception
-    const auto onRefreshFailed =
-        [ this, service, collEntry, nss, refreshAttempt,
-          onRefreshCompleted ](WithLock lk, const Status& status) noexcept {
-        onRefreshCompleted(status, nullptr);
-
-        // It is possible that the metadata is being changed concurrently, so retry the
-        // refresh again
-        if (status == ErrorCodes::ConflictingOperationInProgress &&
-            refreshAttempt < kMaxInconsistentRoutingInfoRefreshAttempts) {
-            _scheduleCollectionRefresh(lk, service, collEntry, nss, refreshAttempt + 1);
-        } else {
-            // Leave needsRefresh to true so that any subsequent get attempts will kick off
-            // another round of refresh
-            collEntry->refreshCompletionNotification->set(status);
-            collEntry->refreshCompletionNotification = nullptr;
-        }
-    };
-
-    const auto refreshCallback =
-        [ this, service, collEntry, nss, existingRoutingInfo, onRefreshFailed, onRefreshCompleted ](
-            StatusWith<CatalogCacheLoader::CollectionAndChangedChunks> swCollAndChunks) noexcept {
-
-        ThreadClient tc("CatalogCache::collectionRefresh", service);
-        auto opCtx = tc->makeOperationContext();
-
-        std::shared_ptr<RoutingTableHistory> newRoutingInfo;
-        try {
-            newRoutingInfo = refreshCollectionRoutingInfo(
-                opCtx.get(), nss, std::move(existingRoutingInfo), std::move(swCollAndChunks));
-
-            onRefreshCompleted(Status::OK(), newRoutingInfo.get());
-        } catch (const DBException& ex) {
-            stdx::lock_guard<Latch> lg(_mutex);
-            onRefreshFailed(lg, ex.toStatus());
-            return;
-        }
-
-        stdx::lock_guard<Latch> lg(_mutex);
-
-        collEntry->epochHasChanged = false;
-        collEntry->needsRefresh = false;
-        collEntry->refreshCompletionNotification->set(Status::OK());
-        collEntry->refreshCompletionNotification = nullptr;
-
-        setOperationShouldBlockBehindCatalogCacheRefresh(opCtx.get(), false);
-
-        // TODO(SERVER-49876): remove clang-tidy NOLINT comments.
-        if (existingRoutingInfo && newRoutingInfo &&  // NOLINT(bugprone-use-after-move)
-            existingRoutingInfo->getVersion() ==      // NOLINT(bugprone-use-after-move)
-                newRoutingInfo->getVersion()) {       // NOLINT(bugprone-use-after-move)
-            // If the routingInfo hasn't changed, we need to manually reset stale shards.
-            newRoutingInfo->setAllShardsRefreshed();
-        }
-
-        collEntry->routingInfo = std::move(newRoutingInfo);
-    };
-
-    const ChunkVersion startingCollectionVersion =
-        (existingRoutingInfo ? existingRoutingInfo->getVersion() : ChunkVersion::UNSHARDED());
-
-    LOGV2_FOR_CATALOG_REFRESH(
-        24106,
-        1,
-        "Refreshing cached collection {namespace} with version {currentCollectionVersion}",
-        "namespace"_attr = nss,
-        "currentCollectionVersion"_attr = startingCollectionVersion);
-
-    _cacheLoader.getChunksSince(nss, startingCollectionVersion)
-        .thenRunOn(_executor)
-        .getAsync(refreshCallback);
-
-    // The routing info for this collection shouldn't change, as other threads may try to use the
-    // CatalogCache while we are waiting for the refresh to complete.
-    invariant(collEntry->routingInfo.get() == existingRoutingInfo.get());
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkEpochStale(const NamespaceString& nss) {
-    stdx::lock_guard<Latch> lg(_mutex);
-    auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
-    collRoutingInfoEntry->needsRefresh = true;
-    collRoutingInfoEntry->epochHasChanged = true;
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkShardStale(const NamespaceString& nss,
-                                                                const ShardId& staleShardId) {
-    stdx::lock_guard<Latch> lg(_mutex);
-    auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
-    collRoutingInfoEntry->needsRefresh = true;
-    if (collRoutingInfoEntry->routingInfo) {
-        collRoutingInfoEntry->routingInfo->setShardStale(staleShardId);
-    }
-}
-
-void CatalogCache::_createOrGetCollectionEntryAndMarkAsNeedsRefresh(const NamespaceString& nss) {
-    stdx::lock_guard<Latch> lg(_mutex);
-    auto collRoutingInfoEntry = _createOrGetCollectionEntry(lg, nss);
-    collRoutingInfoEntry->needsRefresh = true;
-}
-
-std::shared_ptr<CatalogCache::CollectionRoutingInfoEntry> CatalogCache::_createOrGetCollectionEntry(
-    WithLock wl, const NamespaceString& nss) {
-    auto& collectionsForDb = _collectionsByDb[nss.db()];
-    if (!collectionsForDb.contains(nss.ns())) {
-        // TODO SERVER-46199: ensure collections cache size is capped
-        // currently no routine except for dropDatabase is removing cached collection entries and
-        // the cache for a specific DB can grow indefinitely.
-        collectionsForDb[nss.ns()] = std::make_shared<CollectionRoutingInfoEntry>();
-    }
-
-    return collectionsForDb[nss.ns()];
+void CatalogCache::invalidateCollectionEntry_LINEARIZABLE(const NamespaceString& nss) {
+    _collectionCache.invalidate(nss);
 }
 
 void CatalogCache::Stats::report(BSONObjBuilder* builder) const {
@@ -708,14 +393,6 @@ void CatalogCache::Stats::report(BSONObjBuilder* builder) const {
 
     builder->append("totalRefreshWaitTimeMicros", totalRefreshWaitTimeMicros.load());
 
-    builder->append("numActiveIncrementalRefreshes", numActiveIncrementalRefreshes.load());
-    builder->append("countIncrementalRefreshesStarted", countIncrementalRefreshesStarted.load());
-
-    builder->append("numActiveFullRefreshes", numActiveFullRefreshes.load());
-    builder->append("countFullRefreshesStarted", countFullRefreshesStarted.load());
-
-    builder->append("countFailedRefreshes", countFailedRefreshes.load());
-
     if (isMongos()) {
         BSONObjBuilder operationsBlockedByRefreshBuilder(
             builder->subobjStart("operationsBlockedByRefresh"));
@@ -756,7 +433,6 @@ CatalogCache::DatabaseCache::LookupResult CatalogCache::DatabaseCache::_lookupDa
     OperationContext* opCtx,
     const std::string& dbName,
     const ComparableDatabaseVersion& previousDbVersion) {
-
     // TODO (SERVER-34164): Track and increment stats for database refreshes
 
     LOGV2_FOR_CATALOG_REFRESH(24102, 2, "Refreshing cached database entry", "db"_attr = dbName);
@@ -788,73 +464,199 @@ CatalogCache::DatabaseCache::LookupResult CatalogCache::DatabaseCache::_lookupDa
     }
 }
 
-AtomicWord<uint64_t> ComparableDatabaseVersion::_localSequenceNumSource{1ULL};
+CatalogCache::CollectionCache::CollectionCache(ServiceContext* service,
+                                               ThreadPoolInterface& threadPool,
+                                               CatalogCacheLoader& catalogCacheLoader)
+    : ReadThroughCache(_mutex,
+                       service,
+                       threadPool,
+                       [this](OperationContext* opCtx,
+                              const NamespaceString& nss,
+                              const ValueHandle& collectionHistory,
+                              const ComparableChunkVersion& previousChunkVersion) {
+                           return _lookupCollection(
+                               opCtx, nss, collectionHistory, previousChunkVersion);
+                       },
+                       kCollectionCacheSize),
+      _catalogCacheLoader(catalogCacheLoader) {}
 
-ComparableDatabaseVersion ComparableDatabaseVersion::makeComparableDatabaseVersion(
-    const DatabaseVersion& version) {
-    return ComparableDatabaseVersion(version, _localSequenceNumSource.fetchAndAdd(1));
+void CatalogCache::CollectionCache::reportStats(BSONObjBuilder* builder) const {
+    _stats.report(builder);
 }
 
-const DatabaseVersion& ComparableDatabaseVersion::getVersion() const {
-    return _dbVersion;
+void CatalogCache::CollectionCache::_updateRefreshesStats(const bool isIncremental,
+                                                          const bool add) {
+    if (add) {
+        if (isIncremental) {
+            _stats.numActiveIncrementalRefreshes.addAndFetch(1);
+            _stats.countIncrementalRefreshesStarted.addAndFetch(1);
+        } else {
+            _stats.numActiveFullRefreshes.addAndFetch(1);
+            _stats.countFullRefreshesStarted.addAndFetch(1);
+        }
+    } else {
+        if (isIncremental) {
+            _stats.numActiveIncrementalRefreshes.subtractAndFetch(1);
+        } else {
+            _stats.numActiveFullRefreshes.subtractAndFetch(1);
+        }
+    }
 }
 
-uint64_t ComparableDatabaseVersion::getLocalSequenceNum() const {
-    return _localSequenceNum;
-}
+void CatalogCache::CollectionCache::Stats::report(BSONObjBuilder* builder) const {
+    builder->append("numActiveIncrementalRefreshes", numActiveIncrementalRefreshes.load());
+    builder->append("countIncrementalRefreshesStarted", countIncrementalRefreshesStarted.load());
 
-BSONObj ComparableDatabaseVersion::toBSON() const {
-    BSONObjBuilder builder;
-    _dbVersion.getUuid().appendToBuilder(&builder, "uuid");
-    builder.append("lastMod", _dbVersion.getLastMod());
-    builder.append("localSequenceNum", std::to_string(_localSequenceNum));
-    return builder.obj();
-}
+    builder->append("numActiveFullRefreshes", numActiveFullRefreshes.load());
+    builder->append("countFullRefreshesStarted", countFullRefreshesStarted.load());
 
-std::string ComparableDatabaseVersion::toString() const {
-    return toBSON().toString();
+    builder->append("countFailedRefreshes", countFailedRefreshes.load());
 }
 
+CatalogCache::CollectionCache::LookupResult CatalogCache::CollectionCache::_lookupCollection(
+    OperationContext* opCtx,
+    const NamespaceString& nss,
+    const RoutingTableHistoryValueHandle& existingHistory,
+    const ComparableChunkVersion& previousVersion) {
+    const bool isIncremental(existingHistory && existingHistory->optRt);
+    _updateRefreshesStats(isIncremental, true);
 
-CachedDatabaseInfo::CachedDatabaseInfo(DatabaseType dbt, std::shared_ptr<Shard> primaryShard)
-    : _dbt(std::move(dbt)), _primaryShard(std::move(primaryShard)) {}
+    Timer t{};
+    try {
+        auto lookupVersion =
+            isIncremental ? existingHistory->optRt->getVersion() : ChunkVersion::UNSHARDED();
 
-const ShardId& CachedDatabaseInfo::primaryId() const {
-    return _dbt.getPrimary();
+        LOGV2_FOR_CATALOG_REFRESH(4619900,
+                                  1,
+                                  "Refreshing cached collection",
+                                  "namespace"_attr = nss,
+                                  "currentVersion"_attr = previousVersion);
+
+        auto collectionAndChunks = _catalogCacheLoader.getChunksSince(nss, lookupVersion).get();
+
+        auto newRoutingHistory = [&] {
+            // If we have routing info already and it's for the same collection epoch, we're
+            // updating. Otherwise, we're making a whole new routing table.
+            if (isIncremental &&
+                existingHistory->optRt->getVersion().epoch() == collectionAndChunks.epoch) {
+                return existingHistory->optRt->makeUpdated(collectionAndChunks.reshardingFields,
+                                                           collectionAndChunks.changedChunks);
+            }
+
+            auto defaultCollator = [&]() -> std::unique_ptr<CollatorInterface> {
+                if (!collectionAndChunks.defaultCollation.isEmpty()) {
+                    // The collation should have been validated upon collection creation
+                    return uassertStatusOK(
+                        CollatorFactoryInterface::get(opCtx->getServiceContext())
+                            ->makeFromBSON(collectionAndChunks.defaultCollation));
+                }
+                return nullptr;
+            }();
+
+            return RoutingTableHistory::makeNew(nss,
+                                                collectionAndChunks.uuid,
+                                                KeyPattern(collectionAndChunks.shardKeyPattern),
+                                                std::move(defaultCollator),
+                                                collectionAndChunks.shardKeyIsUnique,
+                                                collectionAndChunks.epoch,
+                                                std::move(collectionAndChunks.reshardingFields),
+                                                collectionAndChunks.changedChunks);
+        }();
+
+        newRoutingHistory.setAllShardsRefreshed();
+
+        // Check that the shards all match with what is on the config server
+        std::set<ShardId> shardIds;
+        newRoutingHistory.getAllShardIds(&shardIds);
+        for (const auto& shardId : shardIds) {
+            uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardId));
+        }
+
+        const auto newVersion =
+            ComparableChunkVersion::makeComparableChunkVersion(newRoutingHistory.getVersion());
+
+        LOGV2_FOR_CATALOG_REFRESH(4619901,
+                                  isIncremental || newVersion != previousVersion ? 0 : 1,
+                                  "Refreshed cached collection",
+                                  "namespace"_attr = nss,
+                                  "newVersion"_attr = newVersion,
+                                  "oldVersion"_attr = previousVersion,
+                                  "duration"_attr = Milliseconds(t.millis()));
+        _updateRefreshesStats(isIncremental, false);
+
+        return LookupResult(OptionalRoutingTableHistory(std::move(newRoutingHistory)), newVersion);
+    } catch (const DBException& ex) {
+        _stats.countFailedRefreshes.addAndFetch(1);
+        _updateRefreshesStats(isIncremental, false);
+
+        if (ex.code() == ErrorCodes::NamespaceNotFound) {
+            LOGV2_FOR_CATALOG_REFRESH(4619902,
+                                      0,
+                                      "Collection has found to be unsharded after refresh",
+                                      "namespace"_attr = nss,
+                                      "duration"_attr = Milliseconds(t.millis()));
+
+            return LookupResult(
+                OptionalRoutingTableHistory(),
+                ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED()));
+        }
+
+        LOGV2_FOR_CATALOG_REFRESH(4619903,
+                                  0,
+                                  "Error refreshing cached collection",
+                                  "namespace"_attr = nss,
+                                  "duration"_attr = Milliseconds(t.millis()),
+                                  "error"_attr = redact(ex));
+
+        throw;
+    }
 }
 
-bool CachedDatabaseInfo::shardingEnabled() const {
-    return _dbt.getSharded();
+AtomicWord<uint64_t> ComparableDatabaseVersion::_uuidDisambiguatingSequenceNumSource{1ULL};
+
+ComparableDatabaseVersion ComparableDatabaseVersion::makeComparableDatabaseVersion(
+    const DatabaseVersion& version) {
+    return ComparableDatabaseVersion(version, _uuidDisambiguatingSequenceNumSource.fetchAndAdd(1));
 }
 
-DatabaseVersion CachedDatabaseInfo::databaseVersion() const {
-    return _dbt.getVersion();
+std::string ComparableDatabaseVersion::toString() const {
+    return str::stream() << (_dbVersion ? _dbVersion->toBSON().toString() : "NONE") << "|"
+                         << _uuidDisambiguatingSequenceNum;
 }
 
-AtomicWord<uint64_t> ComparableChunkVersion::_localSequenceNumSource{1ULL};
+bool ComparableDatabaseVersion::operator==(const ComparableDatabaseVersion& other) const {
+    if (!_dbVersion && !other._dbVersion)
+        return true;  // Default constructed value
+    if (_dbVersion.is_initialized() != other._dbVersion.is_initialized())
+        return false;  // One side is default constructed value
 
-ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersion(
-    const ChunkVersion& version) {
-    return ComparableChunkVersion(version, _localSequenceNumSource.fetchAndAdd(1));
+    return sameUuid(other) && (_dbVersion->getLastMod() == other._dbVersion->getLastMod());
 }
 
-const ChunkVersion& ComparableChunkVersion::getVersion() const {
-    return _chunkVersion;
+bool ComparableDatabaseVersion::operator<(const ComparableDatabaseVersion& other) const {
+    if (!_dbVersion && !other._dbVersion)
+        return false;  // Default constructed value
+
+    if (_dbVersion && other._dbVersion && sameUuid(other)) {
+        return _dbVersion->getLastMod() < other._dbVersion->getLastMod();
+    } else {
+        return _uuidDisambiguatingSequenceNum < other._uuidDisambiguatingSequenceNum;
+    }
 }
 
-uint64_t ComparableChunkVersion::getLocalSequenceNum() const {
-    return _localSequenceNum;
+CachedDatabaseInfo::CachedDatabaseInfo(DatabaseType dbt, std::shared_ptr<Shard> primaryShard)
+    : _dbt(std::move(dbt)), _primaryShard(std::move(primaryShard)) {}
+
+const ShardId& CachedDatabaseInfo::primaryId() const {
+    return _dbt.getPrimary();
 }
 
-BSONObj ComparableChunkVersion::toBSON() const {
-    BSONObjBuilder builder;
-    _chunkVersion.appendToCommand(&builder);
-    builder.append("localSequenceNum", std::to_string(_localSequenceNum));
-    return builder.obj();
+bool CachedDatabaseInfo::shardingEnabled() const {
+    return _dbt.getSharded();
 }
 
-std::string ComparableChunkVersion::toString() const {
-    return toBSON().toString();
+DatabaseVersion CachedDatabaseInfo::databaseVersion() const {
+    return _dbt.getVersion();
 }
 
 }  // namespace mongo
diff --git a/src/mongo/s/catalog_cache.h b/src/mongo/s/catalog_cache.h
index a957189183a..796b9e10136 100644
--- a/src/mongo/s/catalog_cache.h
+++ b/src/mongo/s/catalog_cache.h
@@ -45,8 +45,6 @@
 namespace mongo {
 
 class BSONObjBuilder;
-class CachedDatabaseInfo;
-class OperationContext;
 
 static constexpr int kMaxNumStaleVersionRetries = 10;
 
@@ -64,21 +62,21 @@ extern const OperationContext::Decoration<bool> operationShouldBlockBehindCatalo
  * in fact is impossible to compare two different DatabaseVersion that have different UUIDs.
  *
  * This class wrap a DatabaseVersion object to make it always comparable by timestamping it with a
- * node-local sequence number (_dbVersionLocalSequence).
+ * node-local sequence number (_uuidDisambiguatingSequenceNum).
  *
  * This class class should go away once a cluster-wide comparable DatabaseVersion will be
  * implemented.
  */
 class ComparableDatabaseVersion {
 public:
-    /*
-     * Create a ComparableDatabaseVersion that wraps the given DatabaseVersion.
-     * Each object created through this method will have a local sequence number grater then the
+    /**
+     * Creates a ComparableDatabaseVersion that wraps the given DatabaseVersion.
+     * Each object created through this method will have a local sequence number greater than the
      * previously created ones.
      */
     static ComparableDatabaseVersion makeComparableDatabaseVersion(const DatabaseVersion& version);
 
-    /*
+    /**
      * Empty constructor needed by the ReadThroughCache.
      *
      * Instances created through this constructor will be always less then the ones created through
@@ -86,39 +84,28 @@ public:
      */
     ComparableDatabaseVersion() = default;
 
-    const DatabaseVersion& getVersion() const;
-
-    uint64_t getLocalSequenceNum() const;
-
-    BSONObj toBSON() const;
+    const DatabaseVersion& getVersion() const {
+        return *_dbVersion;
+    }
 
     std::string toString() const;
 
-    // Rerturns true if the two versions have the same UUID
     bool sameUuid(const ComparableDatabaseVersion& other) const {
-        return _dbVersion.getUuid() == other._dbVersion.getUuid();
+        return _dbVersion->getUuid() == other._dbVersion->getUuid();
     }
 
-    bool operator==(const ComparableDatabaseVersion& other) const {
-        return sameUuid(other) && (_dbVersion.getLastMod() == other._dbVersion.getLastMod());
-    }
+    bool operator==(const ComparableDatabaseVersion& other) const;
 
     bool operator!=(const ComparableDatabaseVersion& other) const {
         return !(*this == other);
     }
 
-    /*
-     * In the case the two compared instances have different UUIDs the most recently created one
-     * will be grater, otherwise the comparision will be driven by the lastMod field of the
-     * underlying DatabaseVersion.
+    /**
+     * In case the two compared instances have different UUIDs, the most recently created one will
+     * be greater, otherwise the comparison will be driven by the lastMod field of the underlying
+     * DatabaseVersion.
      */
-    bool operator<(const ComparableDatabaseVersion& other) const {
-        if (sameUuid(other)) {
-            return _dbVersion.getLastMod() < other._dbVersion.getLastMod();
-        } else {
-            return _localSequenceNum < other._localSequenceNum;
-        }
-    }
+    bool operator<(const ComparableDatabaseVersion& other) const;
 
     bool operator>(const ComparableDatabaseVersion& other) const {
         return other < *this;
@@ -133,92 +120,18 @@ public:
     }
 
 private:
-    static AtomicWord<uint64_t> _localSequenceNumSource;
+    static AtomicWord<uint64_t> _uuidDisambiguatingSequenceNumSource;
+
+    ComparableDatabaseVersion(const DatabaseVersion& version,
+                              uint64_t uuidDisambiguatingSequenceNum)
+        : _dbVersion(version), _uuidDisambiguatingSequenceNum(uuidDisambiguatingSequenceNum) {}
 
-    ComparableDatabaseVersion(const DatabaseVersion& version, uint64_t localSequenceNum)
-        : _dbVersion(version), _localSequenceNum(localSequenceNum) {}
+    boost::optional<DatabaseVersion> _dbVersion;
 
-    DatabaseVersion _dbVersion;
     // Locally incremented sequence number that allows to compare two database versions with
     // different UUIDs. Each new comparableDatabaseVersion will have a greater sequence number then
     // the ones created before.
-    uint64_t _localSequenceNum{0};
-};
-
-/**
- * Constructed to be used exclusively by the CatalogCache as a vector clock (Time) to drive
- * CollectionCache's lookups.
- *
- * The ChunkVersion class contains an non comparable epoch, which makes impossible to compare two
- * ChunkVersions when their epochs's differ.
- *
- * This class wraps a ChunkVersion object with a node-local sequence number (_localSequenceNum) that
- * allows the comparision.
- *
- * This class should go away once a cluster-wide comparable ChunkVersion is implemented.
- */
-class ComparableChunkVersion {
-public:
-    static ComparableChunkVersion makeComparableChunkVersion(const ChunkVersion& version);
-
-    ComparableChunkVersion() = default;
-
-    const ChunkVersion& getVersion() const;
-
-    uint64_t getLocalSequenceNum() const;
-
-    BSONObj toBSON() const;
-
-    std::string toString() const;
-
-    bool sameEpoch(const ComparableChunkVersion& other) const {
-        return _chunkVersion.epoch() == other._chunkVersion.epoch();
-    }
-
-    bool operator==(const ComparableChunkVersion& other) const {
-        return sameEpoch(other) &&
-            (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() &&
-             _chunkVersion.minorVersion() == other._chunkVersion.minorVersion());
-    }
-
-    bool operator!=(const ComparableChunkVersion& other) const {
-        return !(*this == other);
-    }
-
-    bool operator<(const ComparableChunkVersion& other) const {
-        if (sameEpoch(other)) {
-            return _chunkVersion.majorVersion() < other._chunkVersion.majorVersion() ||
-                (_chunkVersion.majorVersion() == other._chunkVersion.majorVersion() &&
-                 _chunkVersion.minorVersion() < other._chunkVersion.minorVersion());
-        } else {
-            return _localSequenceNum < other._localSequenceNum;
-        }
-    }
-
-    bool operator>(const ComparableChunkVersion& other) const {
-        return other < *this;
-    }
-
-    bool operator<=(const ComparableChunkVersion& other) const {
-        return !(*this > other);
-    }
-
-    bool operator>=(const ComparableChunkVersion& other) const {
-        return !(*this < other);
-    }
-
-private:
-    static AtomicWord<uint64_t> _localSequenceNumSource;
-
-    ComparableChunkVersion(const ChunkVersion& version, uint64_t localSequenceNum)
-        : _chunkVersion(version), _localSequenceNum(localSequenceNum) {}
-
-    ChunkVersion _chunkVersion;
-
-    // Locally incremented sequence number that allows to compare two colection versions with
-    // different epochs. Each new comparableChunkVersion will have a greater sequence number than
-    // the ones created before.
-    uint64_t _localSequenceNum{0};
+    uint64_t _uuidDisambiguatingSequenceNum{0};
 };
 
 /**
@@ -298,21 +211,9 @@ public:
 
     /**
      * Same as getCollectionRoutingInfo above, but in addition causes the namespace to be refreshed.
-     *
-     * When forceRefreshFromThisThread is false, it's possible for this call to
-     * join an ongoing refresh from another thread forceRefreshFromThisThread.
-     * forceRefreshFromThisThread checks whether it joined another thread and
-     * then forces it to try again, which is necessary in cases where calls to
-     * getCollectionRoutingInfoWithRefresh must be causally consistent
-     *
-     * TODO: Remove this parameter in favor of using collection creation time +
-     * collection version to decide when a refresh is necessary and provide
-     * proper causal consistency
      */
-    StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh(
-        OperationContext* opCtx,
-        const NamespaceString& nss,
-        bool forceRefreshFromThisThread = false);
+    StatusWith<ChunkManager> getCollectionRoutingInfoWithRefresh(OperationContext* opCtx,
+                                                                 const NamespaceString& nss);
 
     /**
      * Same as getCollectionRoutingInfoWithRefresh above, but in addition returns a
@@ -333,11 +234,6 @@ public:
                                 const boost::optional<DatabaseVersion>& wantedVersion);
 
     /**
-     * Gets whether this operation should block behind a catalog cache refresh.
-     */
-    static bool getOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx);
-
-    /**
      * Sets whether this operation should block behind a catalog cache refresh.
      */
     static void setOperationShouldBlockBehindCatalogCacheRefresh(OperationContext* opCtx,
@@ -349,18 +245,9 @@ public:
      * requests to block on an upcoming catalog cache refresh.
      */
     void invalidateShardOrEntireCollectionEntryForShardedCollection(
-        OperationContext* opCtx,
         const NamespaceString& nss,
-        boost::optional<ChunkVersion> wantedVersion,
-        const ChunkVersion& receivedVersion,
-        ShardId shardId);
-
-    /**
-     * Non-blocking method that marks the current collection entry for the namespace as needing
-     * refresh due to an epoch change. Will cause all further targetting attempts for this
-     * namespace to block on a catalog cache refresh.
-     */
-    void onEpochChange(const NamespaceString& nss);
+        const boost::optional<ChunkVersion>& wantedVersion,
+        const ShardId& shardId);
 
     /**
      * Throws a StaleConfigException if this catalog cache does not have an entry for the given
@@ -370,16 +257,8 @@ public:
      * version to throw a StaleConfigException.
      */
     void checkEpochOrThrow(const NamespaceString& nss,
-                           ChunkVersion targetCollectionVersion,
-                           const ShardId& shardId) const;
-
-    /**
-     * Non-blocking method, which invalidates the shard for the routing table for the specified
-     * namespace. If that shard is targetted in the future, getCollectionRoutingInfo will wait on a
-     * refresh.
-     */
-    void invalidateShardForShardedCollection(const NamespaceString& nss,
-                                             const ShardId& staleShardId);
+                           const ChunkVersion& targetCollectionVersion,
+                           const ShardId& shardId);
 
     /**
      * Non-blocking method, which invalidates all namespaces which contain data on the specified
@@ -388,12 +267,6 @@ public:
     void invalidateEntriesThatReferenceShard(const ShardId& shardId);
 
     /**
-     * Non-blocking method, which removes the entire specified collection from the cache (resulting
-     * in full refresh on subsequent access)
-     */
-    void purgeCollection(const NamespaceString& nss);
-
-    /**
      * Non-blocking method, which removes the entire specified database (including its collections)
      * from the cache.
      */
@@ -416,35 +289,17 @@ public:
      */
     void checkAndRecordOperationBlockedByRefresh(OperationContext* opCtx, mongo::LogicalOp opType);
 
+    /**
+     * Non-blocking method that marks the current collection entry for the namespace as needing
+     * refresh. Will cause all further targetting attempts to block on a catalog cache refresh,
+     * even if they do not require causal consistency.
+     */
+    void invalidateCollectionEntry_LINEARIZABLE(const NamespaceString& nss);
+
 private:
     // Make the cache entries friends so they can access the private classes below
     friend class CachedDatabaseInfo;
 
-    /**
-     * Cache entry describing a collection.
-     */
-    struct CollectionRoutingInfoEntry {
-        CollectionRoutingInfoEntry() = default;
-        // Disable copy (and move) semantics
-        CollectionRoutingInfoEntry(const CollectionRoutingInfoEntry&) = delete;
-        CollectionRoutingInfoEntry& operator=(const CollectionRoutingInfoEntry&) = delete;
-
-        // Specifies whether this cache entry needs a refresh (in which case routingInfo should not
-        // be relied on) or it doesn't, in which case there should be a non-null routingInfo.
-        bool needsRefresh{true};
-
-        // Specifies whether the namespace has had an epoch change, which indicates that every
-        // shard should block on an upcoming refresh.
-        bool epochHasChanged{true};
-
-        // Contains a notification to be waited on for the refresh to complete (only available if
-        // needsRefresh is true)
-        std::shared_ptr<Notification<Status>> refreshCompletionNotification;
-
-        // Contains the cached routing information (only available if needsRefresh is false)
-        std::shared_ptr<RoutingTableHistory> routingInfo;
-    };
-
     class DatabaseCache
         : public ReadThroughCache<std::string, DatabaseType, ComparableDatabaseVersion> {
     public:
@@ -461,88 +316,54 @@ private:
         Mutex _mutex = MONGO_MAKE_LATCH("DatabaseCache::_mutex");
     };
 
-    /**
-     * Non-blocking call which schedules an asynchronous refresh for the specified namespace. The
-     * namespace must be in the 'needRefresh' state.
-     */
-    void _scheduleCollectionRefresh(WithLock,
-                                    ServiceContext* service,
-                                    std::shared_ptr<CollectionRoutingInfoEntry> collEntry,
-                                    NamespaceString const& nss,
-                                    int refreshAttempt);
+    class CollectionCache : public RoutingTableHistoryCache {
+    public:
+        CollectionCache(ServiceContext* service,
+                        ThreadPoolInterface& threadPool,
+                        CatalogCacheLoader& catalogCacheLoader);
 
-    /**
-     * Marks a collection entry as needing refresh. Will create the collection entry if one does
-     * not exist. Also marks the epoch as changed, which will cause all further targetting requests
-     * against this namespace to block upon a catalog cache refresh.
-     */
-    void _createOrGetCollectionEntryAndMarkEpochStale(const NamespaceString& nss);
+        void reportStats(BSONObjBuilder* builder) const;
 
-    /**
-     * Marks a collection entry as needing refresh. Will create the collection entry if one does
-     * not exist. Will mark the given shard ID as stale, which will cause all further targetting
-     * requests for the given shard for this namespace to block upon a catalog cache refresh.
-     */
-    void _createOrGetCollectionEntryAndMarkShardStale(const NamespaceString& nss,
-                                                      const ShardId& shardId);
+    private:
+        LookupResult _lookupCollection(OperationContext* opCtx,
+                                       const NamespaceString& nss,
+                                       const ValueHandle& collectionHistory,
+                                       const ComparableChunkVersion& previousChunkVersion);
 
-    /**
-     * Marks a collection entry as needing refresh. Will create the collection entry if one does
-     * not exist.
-     */
-    void _createOrGetCollectionEntryAndMarkAsNeedsRefresh(const NamespaceString& nss);
+        CatalogCacheLoader& _catalogCacheLoader;
+        Mutex _mutex = MONGO_MAKE_LATCH("CollectionCache::_mutex");
 
-    /**
-     * Retrieves the collection entry for the given namespace, creating the entry if one does not
-     * already exist.
-     */
-    std::shared_ptr<CollectionRoutingInfoEntry> _createOrGetCollectionEntry(
-        WithLock wl, const NamespaceString& nss);
+        struct Stats {
+            // Tracks how many incremental refreshes are waiting to complete currently
+            AtomicWord<long long> numActiveIncrementalRefreshes{0};
 
-    /**
-     * Used as a flag to indicate whether or not this thread performed its own
-     * refresh for certain helper functions
-     *
-     * kPerformedRefresh is used only when the calling thread performed the
-     * refresh *itself*
-     *
-     * kDidNotPerformRefresh is used either when there was an error or when
-     * this thread joined an ongoing refresh
-     */
-    enum class RefreshAction {
-        kPerformedRefresh,
-        kDidNotPerformRefresh,
-    };
+            // Cumulative, always-increasing counter of how many incremental refreshes have been
+            // kicked off
+            AtomicWord<long long> countIncrementalRefreshesStarted{0};
 
-    /**
-     * Return type for helper functions performing refreshes so that they can
-     * indicate both status and whether or not this thread performed its own
-     * refresh
-     */
-    struct RefreshResult {
-        // Status containing result of refresh
-        StatusWith<ChunkManager> statusWithInfo;
-        RefreshAction actionTaken;
-    };
+            // Tracks how many full refreshes are waiting to complete currently
+            AtomicWord<long long> numActiveFullRefreshes{0};
 
-    /**
-     * Retrieves the collection routing info for this namespace after blocking on a catalog cache
-     * refresh.
-     */
-    CatalogCache::RefreshResult _getCollectionRoutingInfoWithForcedRefresh(
-        OperationContext* opctx, const NamespaceString& nss);
+            // Cumulative, always-increasing counter of how many full refreshes have been kicked off
+            AtomicWord<long long> countFullRefreshesStarted{0};
 
-    /**
-     * Helper function used when we need the refresh action taken (e.g. when we
-     * want to force refresh)
-     */
-    CatalogCache::RefreshResult _getCollectionRoutingInfo(OperationContext* opCtx,
-                                                          const NamespaceString& nss);
+            // Cumulative, always-increasing counter of how many full or incremental refreshes
+            // failed for whatever reason
+            AtomicWord<long long> countFailedRefreshes{0};
 
-    CatalogCache::RefreshResult _getCollectionRoutingInfoAt(
-        OperationContext* opCtx,
-        const NamespaceString& nss,
-        boost::optional<Timestamp> atClusterTime);
+            /**
+             * Reports the accumulated statistics for serverStatus.
+             */
+            void report(BSONObjBuilder* builder) const;
+
+        } _stats;
+
+        void _updateRefreshesStats(const bool isIncremental, const bool add);
+    };
+
+    StatusWith<ChunkManager> _getCollectionRoutingInfoAt(OperationContext* opCtx,
+                                                         const NamespaceString& nss,
+                                                         boost::optional<Timestamp> atClusterTime);
 
     // Interface from which chunks will be retrieved
     CatalogCacheLoader& _cacheLoader;
@@ -557,23 +378,6 @@ private:
         // combined
         AtomicWord<long long> totalRefreshWaitTimeMicros{0};
 
-        // Tracks how many incremental refreshes are waiting to complete currently
-        AtomicWord<long long> numActiveIncrementalRefreshes{0};
-
-        // Cumulative, always-increasing counter of how many incremental refreshes have been kicked
-        // off
-        AtomicWord<long long> countIncrementalRefreshesStarted{0};
-
-        // Tracks how many full refreshes are waiting to complete currently
-        AtomicWord<long long> numActiveFullRefreshes{0};
-
-        // Cumulative, always-increasing counter of how many full refreshes have been kicked off
-        AtomicWord<long long> countFullRefreshesStarted{0};
-
-        // Cumulative, always-increasing counter of how many full or incremental refreshes failed
-        // for whatever reason
-        AtomicWord<long long> countFailedRefreshes{0};
-
         // Cumulative, always-increasing counter of how many operations have been blocked by a
         // catalog cache refresh. Broken down by operation type to match the operations tracked
         // by the OpCounters class.
@@ -595,15 +399,9 @@ private:
 
     std::shared_ptr<ThreadPool> _executor;
 
-
     DatabaseCache _databaseCache;
 
-    // Mutex to serialize access to the collection cache
-    mutable Mutex _mutex = MONGO_MAKE_LATCH("CatalogCache::_mutex");
-    // Map from full collection name to the routing info for that collection, grouped by database
-    using CollectionInfoMap = StringMap<std::shared_ptr<CollectionRoutingInfoEntry>>;
-    using CollectionsByDbMap = StringMap<CollectionInfoMap>;
-    CollectionsByDbMap _collectionsByDb;
+    CollectionCache _collectionCache;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/s/catalog_cache_refresh_test.cpp b/src/mongo/s/catalog_cache_refresh_test.cpp
index 70b56845eb1..1e21135a15b 100644
--- a/src/mongo/s/catalog_cache_refresh_test.cpp
+++ b/src/mongo/s/catalog_cache_refresh_test.cpp
@@ -440,7 +440,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithLowestVersion) {
 
     ASSERT_EQ(1, initialRoutingInfo.numChunks());
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     const auto incompleteChunks = [&]() {
         ChunkVersion version(1, 0, epoch);
@@ -497,7 +497,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadMissingChunkWithHighestVersion) {
 
     ASSERT_EQ(1, initialRoutingInfo.numChunks());
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     const auto incompleteChunks = [&]() {
         ChunkVersion version(1, 0, epoch);
@@ -551,7 +551,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoad) {
     auto initialRoutingInfo(makeChunkManager(kNss, shardKeyPattern, nullptr, true, {}));
     ASSERT_EQ(1, initialRoutingInfo.numChunks());
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     ChunkVersion version = initialRoutingInfo.getVersion();
 
@@ -598,7 +598,7 @@ TEST_F(CatalogCacheRefreshTest, ChunkEpochChangeDuringIncrementalLoadRecoveryAft
 
     setupNShards(2);
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     ChunkVersion oldVersion = initialRoutingInfo.getVersion();
     const OID newEpoch = OID::gen();
@@ -683,7 +683,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterCollectionEpochChange) {
 
     setupNShards(2);
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     ChunkVersion newVersion(1, 0, OID::gen());
 
@@ -730,7 +730,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterSplit) {
 
     ChunkVersion version = initialRoutingInfo.getVersion();
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     expectGetCollection(version.epoch(), shardKeyPattern);
 
@@ -776,7 +776,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveWithReshardingFieldsAdde
 
     ChunkVersion version = initialRoutingInfo.getVersion();
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     ChunkVersion expectedDestShardVersion;
 
@@ -824,7 +824,7 @@ TEST_F(CatalogCacheRefreshTest, IncrementalLoadAfterMoveLastChunkWithReshardingF
 
     ChunkVersion version = initialRoutingInfo.getVersion();
 
-    auto future = scheduleRoutingInfoForcedRefresh(kNss);
+    auto future = scheduleRoutingInfoIncrementalRefresh(kNss);
 
     // The collection type won't have resharding fields this time.
     expectGetCollection(version.epoch(), shardKeyPattern);
diff --git a/src/mongo/s/catalog_cache_test.cpp b/src/mongo/s/catalog_cache_test.cpp
index fce177bdd4f..8fdb461aca3 100644
--- a/src/mongo/s/catalog_cache_test.cpp
+++ b/src/mongo/s/catalog_cache_test.cpp
@@ -35,6 +35,7 @@
 #include "mongo/s/catalog_cache.h"
 #include "mongo/s/catalog_cache_loader_mock.h"
 #include "mongo/s/sharding_router_test_fixture.h"
+#include "mongo/s/stale_exception.h"
 
 namespace mongo {
 namespace {
@@ -72,7 +73,54 @@ protected:
         _catalogCacheLoader->setDatabaseRefreshReturnValue(kErrorStatus);
     }
 
+    void loadCollection(const ChunkVersion& version) {
+        const auto coll = makeCollectionType(version);
+        _catalogCacheLoader->setCollectionRefreshReturnValue(coll);
+        _catalogCacheLoader->setChunkRefreshReturnValue(makeChunks(version));
+
+        const auto swChunkManager =
+            _catalogCache->getCollectionRoutingInfo(operationContext(), coll.getNs());
+        ASSERT_OK(swChunkManager.getStatus());
+
+        // Reset the loader return values to avoid false positive results
+        _catalogCacheLoader->setCollectionRefreshReturnValue(kErrorStatus);
+        _catalogCacheLoader->setChunkRefreshReturnValue(kErrorStatus);
+    }
+
+    void loadUnshardedCollection(const NamespaceString& nss) {
+        _catalogCacheLoader->setCollectionRefreshReturnValue(
+            Status(ErrorCodes::NamespaceNotFound, "collection not found"));
+
+        const auto swChunkManager =
+            _catalogCache->getCollectionRoutingInfo(operationContext(), nss);
+        ASSERT_OK(swChunkManager.getStatus());
+
+        // Reset the loader return value to avoid false positive results
+        _catalogCacheLoader->setCollectionRefreshReturnValue(kErrorStatus);
+    }
+
+    std::vector<ChunkType> makeChunks(ChunkVersion version) {
+        ChunkType chunk(kNss,
+                        {kShardKeyPattern.getKeyPattern().globalMin(),
+                         kShardKeyPattern.getKeyPattern().globalMax()},
+                        version,
+                        {"0"});
+        chunk.setName(OID::gen());
+        return {chunk};
+    }
+
+    CollectionType makeCollectionType(const ChunkVersion& collVersion) {
+        CollectionType coll;
+        coll.setNs(kNss);
+        coll.setEpoch(collVersion.epoch());
+        coll.setKeyPattern(kShardKeyPattern.getKeyPattern());
+        coll.setUnique(false);
+        return coll;
+    }
+
     const NamespaceString kNss{"catalgoCacheTestDB.foo"};
+    const std::string kPattern{"_id"};
+    const ShardKeyPattern kShardKeyPattern{BSON(kPattern << 1)};
     const int kDummyPort{12345};
     const HostAndPort kConfigHostAndPort{"DummyConfig", kDummyPort};
     const std::vector<ShardId> kShards{{"0"}, {"1"}};
@@ -129,5 +177,86 @@ TEST_F(CatalogCacheTest, InvalidateSingleDbOnShardRemoval) {
     ASSERT_EQ(cachedDb.primaryId(), kShards[1]);
 }
 
+TEST_F(CatalogCacheTest, CheckEpochNoDatabase) {
+    const auto collVersion = ChunkVersion(1, 0, OID::gen());
+    ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+                             StaleConfigException,
+                             [&](const StaleConfigException& ex) {
+                                 const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+                                 ASSERT(staleInfo);
+                                 ASSERT_EQ(staleInfo->getNss(), kNss);
+                                 ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+                                 ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+                                 ASSERT(staleInfo->getVersionWanted() == boost::none);
+                             });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochNoCollection) {
+    const auto dbVersion = DatabaseVersion();
+    const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+    loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+    ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+                             StaleConfigException,
+                             [&](const StaleConfigException& ex) {
+                                 const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+                                 ASSERT(staleInfo);
+                                 ASSERT_EQ(staleInfo->getNss(), kNss);
+                                 ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+                                 ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+                                 ASSERT(staleInfo->getVersionWanted() == boost::none);
+                             });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochUnshardedCollection) {
+    const auto dbVersion = DatabaseVersion();
+    const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+    loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+    loadUnshardedCollection(kNss);
+    ASSERT_THROWS_WITH_CHECK(_catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]),
+                             StaleConfigException,
+                             [&](const StaleConfigException& ex) {
+                                 const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+                                 ASSERT(staleInfo);
+                                 ASSERT_EQ(staleInfo->getNss(), kNss);
+                                 ASSERT_EQ(staleInfo->getVersionReceived(), collVersion);
+                                 ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+                                 ASSERT(staleInfo->getVersionWanted() == boost::none);
+                             });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochWithMismatch) {
+    const auto dbVersion = DatabaseVersion();
+    const auto wantedCollVersion = ChunkVersion(1, 0, OID::gen());
+    const auto receivedCollVersion = ChunkVersion(1, 0, OID::gen());
+
+    loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+    loadCollection(wantedCollVersion);
+
+    ASSERT_THROWS_WITH_CHECK(
+        _catalogCache->checkEpochOrThrow(kNss, receivedCollVersion, kShards[0]),
+        StaleConfigException,
+        [&](const StaleConfigException& ex) {
+            const auto staleInfo = ex.extraInfo<StaleConfigInfo>();
+            ASSERT(staleInfo);
+            ASSERT_EQ(staleInfo->getNss(), kNss);
+            ASSERT_EQ(staleInfo->getVersionReceived(), receivedCollVersion);
+            ASSERT(staleInfo->getVersionWanted() != boost::none);
+            ASSERT_EQ(*(staleInfo->getVersionWanted()), wantedCollVersion);
+            ASSERT_EQ(staleInfo->getShardId(), kShards[0]);
+        });
+}
+
+TEST_F(CatalogCacheTest, CheckEpochWithMatch) {
+    const auto dbVersion = DatabaseVersion();
+    const auto collVersion = ChunkVersion(1, 0, OID::gen());
+
+    loadDatabases({DatabaseType(kNss.db().toString(), kShards[0], true, dbVersion)});
+    loadCollection(collVersion);
+
+    _catalogCache->checkEpochOrThrow(kNss, collVersion, kShards[0]);
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/s/catalog_cache_test_fixture.cpp b/src/mongo/s/catalog_cache_test_fixture.cpp
index 71e02e67fac..4f59eeaef8a 100644
--- a/src/mongo/s/catalog_cache_test_fixture.cpp
+++ b/src/mongo/s/catalog_cache_test_fixture.cpp
@@ -81,6 +81,26 @@ CatalogCacheTestFixture::scheduleRoutingInfoUnforcedRefresh(const NamespaceStrin
     });
 }
 
+executor::NetworkTestEnv::FutureHandle<boost::optional<ChunkManager>>
+CatalogCacheTestFixture::scheduleRoutingInfoIncrementalRefresh(const NamespaceString& nss) {
+    auto catalogCache = Grid::get(getServiceContext())->catalogCache();
+    const auto cm =
+        uassertStatusOK(catalogCache->getCollectionRoutingInfo(operationContext(), nss));
+    ASSERT(cm.isSharded());
+
+    // Simulates the shard wanting a higher version than the one sent by the router.
+    catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
+        nss, boost::none, cm.dbPrimary());
+
+    return launchAsync([this, nss] {
+        auto client = getServiceContext()->makeClient("Test");
+        auto const catalogCache = Grid::get(getServiceContext())->catalogCache();
+
+        return boost::make_optional(
+            uassertStatusOK(catalogCache->getCollectionRoutingInfo(operationContext(), nss)));
+    });
+}
+
 std::vector<ShardType> CatalogCacheTestFixture::setupNShards(int numShards) {
     std::vector<ShardType> shards;
     for (int i = 0; i < numShards; i++) {
diff --git a/src/mongo/s/catalog_cache_test_fixture.h b/src/mongo/s/catalog_cache_test_fixture.h
index fb5238a2ba9..3d58f6a8557 100644
--- a/src/mongo/s/catalog_cache_test_fixture.h
+++ b/src/mongo/s/catalog_cache_test_fixture.h
@@ -84,6 +84,17 @@ protected:
     scheduleRoutingInfoUnforcedRefresh(const NamespaceString& nss);
 
     /**
+     * Advance the time in the cache for 'kNss' and schedules a thread to make an incremental
+     * refresh.
+     *
+     * NOTE: The returned value is always set. The reason to use optional is a deficiency of
+     * std::future with the MSVC STL library, which requires the templated type to be default
+     * constructible.
+     */
+    executor::NetworkTestEnv::FutureHandle<boost::optional<ChunkManager>>
+    scheduleRoutingInfoIncrementalRefresh(const NamespaceString& nss);
+
+    /**
      * Ensures that there are 'numShards' available in the shard registry. The shard ids are
      * generated as "0", "1", etc.
      *
diff --git a/src/mongo/s/chunk_manager.cpp b/src/mongo/s/chunk_manager.cpp
index 5713855e01f..9ded562066c 100644
--- a/src/mongo/s/chunk_manager.cpp
+++ b/src/mongo/s/chunk_manager.cpp
@@ -336,22 +336,23 @@ void RoutingTableHistory::setAllShardsRefreshed() {
 }
 
 Chunk ChunkManager::findIntersectingChunk(const BSONObj& shardKey, const BSONObj& collation) const {
-    const bool hasSimpleCollation = (collation.isEmpty() && !_rt->getDefaultCollator()) ||
+    const bool hasSimpleCollation = (collation.isEmpty() && !_rt->optRt->getDefaultCollator()) ||
         SimpleBSONObjComparator::kInstance.evaluate(collation == CollationSpec::kSimpleSpec);
     if (!hasSimpleCollation) {
         for (BSONElement elt : shardKey) {
             uassert(ErrorCodes::ShardKeyNotFound,
                     str::stream() << "Cannot target single shard due to collation of key "
-                                  << elt.fieldNameStringData() << " for namespace " << _rt->nss(),
+                                  << elt.fieldNameStringData() << " for namespace "
+                                  << _rt->optRt->nss(),
                     !CollationIndexKey::isCollatableType(elt.type()));
         }
     }
 
-    auto chunkInfo = _rt->findIntersectingChunk(shardKey);
+    auto chunkInfo = _rt->optRt->findIntersectingChunk(shardKey);
 
     uassert(ErrorCodes::ShardKeyNotFound,
             str::stream() << "Cannot target single shard using key " << shardKey
-                          << " for namespace " << _rt->nss(),
+                          << " for namespace " << _rt->optRt->nss(),
             chunkInfo && chunkInfo->containsKey(shardKey));
 
     return Chunk(*chunkInfo, _clusterTime);
@@ -361,7 +362,7 @@ bool ChunkManager::keyBelongsToShard(const BSONObj& shardKey, const ShardId& sha
     if (shardKey.isEmpty())
         return false;
 
-    auto chunkInfo = _rt->findIntersectingChunk(shardKey);
+    auto chunkInfo = _rt->optRt->findIntersectingChunk(shardKey);
     if (!chunkInfo)
         return false;
 
@@ -374,7 +375,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
                                        const BSONObj& query,
                                        const BSONObj& collation,
                                        std::set<ShardId>* shardIds) const {
-    auto qr = std::make_unique<QueryRequest>(_rt->nss());
+    auto qr = std::make_unique<QueryRequest>(_rt->optRt->nss());
     qr->setFilter(query);
 
     if (auto uuid = getUUID())
@@ -382,8 +383,8 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
 
     if (!collation.isEmpty()) {
         qr->setCollation(collation);
-    } else if (_rt->getDefaultCollator()) {
-        auto defaultCollator = _rt->getDefaultCollator();
+    } else if (_rt->optRt->getDefaultCollator()) {
+        auto defaultCollator = _rt->optRt->getDefaultCollator();
         qr->setCollation(defaultCollator->getSpec().toBSON());
         expCtx->setCollator(defaultCollator->clone());
     }
@@ -396,7 +397,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
                                      MatchExpressionParser::kAllowAllSpecialFeatures));
 
     // Fast path for targeting equalities on the shard key.
-    auto shardKeyToFind = _rt->getShardKeyPattern().extractShardKeyFromQuery(*cq);
+    auto shardKeyToFind = _rt->optRt->getShardKeyPattern().extractShardKeyFromQuery(*cq);
     if (!shardKeyToFind.isEmpty()) {
         try {
             auto chunk = findIntersectingChunk(shardKeyToFind, collation);
@@ -413,14 +414,14 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
     //   Query { a : { $gte : 1, $lt : 2 },
     //            b : { $gte : 3, $lt : 4 } }
     //   => Bounds { a : [1, 2), b : [3, 4) }
-    IndexBounds bounds = getIndexBoundsForQuery(_rt->getShardKeyPattern().toBSON(), *cq);
+    IndexBounds bounds = getIndexBoundsForQuery(_rt->optRt->getShardKeyPattern().toBSON(), *cq);
 
     // Transforms bounds for each shard key field into full shard key ranges
     // for example :
     //   Key { a : 1, b : 1 }
     //   Bounds { a : [1, 2), b : [3, 4) }
     //   => Ranges { a : 1, b : 3 } => { a : 2, b : 4 }
-    BoundList ranges = _rt->getShardKeyPattern().flattenBounds(bounds);
+    BoundList ranges = _rt->optRt->getShardKeyPattern().flattenBounds(bounds);
 
     for (BoundList::const_iterator it = ranges.begin(); it != ranges.end(); ++it) {
         getShardIdsForRange(it->first /*min*/, it->second /*max*/, shardIds);
@@ -430,7 +431,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
         // because _shardVersions contains shards with chunks and is built based on the last
         // refresh. Therefore, it is possible for _shardVersions to have fewer entries if a shard
         // no longer owns chunks when it used to at _clusterTime.
-        if (!_clusterTime && shardIds->size() == _rt->_shardVersions.size()) {
+        if (!_clusterTime && shardIds->size() == _rt->optRt->_shardVersions.size()) {
             break;
         }
     }
@@ -439,7 +440,7 @@ void ChunkManager::getShardIdsForQuery(boost::intrusive_ptr<ExpressionContext> e
     // For now, we satisfy that assumption by adding a shard with no matches rather than returning
     // an empty set of shards.
     if (shardIds->empty()) {
-        _rt->forEachChunk([&](const std::shared_ptr<ChunkInfo>& chunkInfo) {
+        _rt->optRt->forEachChunk([&](const std::shared_ptr<ChunkInfo>& chunkInfo) {
             shardIds->insert(chunkInfo->getShardIdAt(_clusterTime));
             return false;
         });
@@ -459,7 +460,7 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
         return;
     }
 
-    _rt->forEachOverlappingChunk(min, max, true, [&](auto& chunkInfo) {
+    _rt->optRt->forEachOverlappingChunk(min, max, true, [&](auto& chunkInfo) {
         shardIds->insert(chunkInfo->getShardIdAt(_clusterTime));
 
         // No need to iterate through the rest of the ranges, because we already know we need to use
@@ -467,7 +468,7 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
         // because _shardVersions contains shards with chunks and is built based on the last
         // refresh. Therefore, it is possible for _shardVersions to have fewer entries if a shard
         // no longer owns chunks when it used to at _clusterTime.
-        if (!_clusterTime && shardIds->size() == _rt->_shardVersions.size()) {
+        if (!_clusterTime && shardIds->size() == _rt->optRt->_shardVersions.size()) {
             return false;
         }
 
@@ -478,14 +479,15 @@ void ChunkManager::getShardIdsForRange(const BSONObj& min,
 bool ChunkManager::rangeOverlapsShard(const ChunkRange& range, const ShardId& shardId) const {
     bool overlapFound = false;
 
-    _rt->forEachOverlappingChunk(range.getMin(), range.getMax(), false, [&](auto& chunkInfo) {
-        if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
-            overlapFound = true;
-            return false;
-        }
+    _rt->optRt->forEachOverlappingChunk(
+        range.getMin(), range.getMax(), false, [&](auto& chunkInfo) {
+            if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
+                overlapFound = true;
+                return false;
+            }
 
-        return true;
-    });
+            return true;
+        });
 
     return overlapFound;
 }
@@ -494,7 +496,7 @@ boost::optional<Chunk> ChunkManager::getNextChunkOnShard(const BSONObj& shardKey
                                                          const ShardId& shardId) const {
     boost::optional<Chunk> chunk;
 
-    _rt->forEachChunk(
+    _rt->optRt->forEachChunk(
         [&](auto& chunkInfo) {
             if (chunkInfo->getShardIdAt(_clusterTime) == shardId) {
                 chunk.emplace(*chunkInfo, _clusterTime);
@@ -654,7 +656,7 @@ ChunkManager ChunkManager::makeAtTime(const ChunkManager& cm, Timestamp clusterT
 }
 
 std::string ChunkManager::toString() const {
-    return _rt ? _rt->toString() : "UNSHARDED";
+    return _rt->optRt ? _rt->optRt->toString() : "UNSHARDED";
 }
 
 bool RoutingTableHistory::compatibleWith(const RoutingTableHistory& other,
@@ -733,7 +735,7 @@ RoutingTableHistory RoutingTableHistory::makeUpdated(
     auto changedChunkInfos = flatten(changedChunks);
     auto chunkMap = _chunkMap.createMerged(changedChunkInfos);
 
-    // If at least one diff was applied, the collection's version must have advanced
+    // Only update the same collection.
     invariant(getVersion().epoch() == chunkMap.getVersion().epoch());
 
     return RoutingTableHistory(_nss,
@@ -745,4 +747,60 @@ RoutingTableHistory RoutingTableHistory::makeUpdated(
                                std::move(chunkMap));
 }
 
+AtomicWord<uint64_t> ComparableChunkVersion::_epochDisambiguatingSequenceNumSource{1ULL};
+AtomicWord<uint64_t> ComparableChunkVersion::_forcedRefreshSequenceNumSource{1ULL};
+
+ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersion(
+    const ChunkVersion& version) {
+    return ComparableChunkVersion(_forcedRefreshSequenceNumSource.load(),
+                                  version,
+                                  _epochDisambiguatingSequenceNumSource.fetchAndAdd(1));
+}
+
+ComparableChunkVersion ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh() {
+    return ComparableChunkVersion(_forcedRefreshSequenceNumSource.addAndFetch(2) - 1,
+                                  boost::none,
+                                  _epochDisambiguatingSequenceNumSource.fetchAndAdd(1));
+}
+
+std::string ComparableChunkVersion::toString() const {
+    return str::stream() << _forcedRefreshSequenceNum << "|"
+                         << (_chunkVersion ? _chunkVersion->toString() : "NONE") << "|"
+                         << _epochDisambiguatingSequenceNum;
+}
+
+bool ComparableChunkVersion::operator==(const ComparableChunkVersion& other) const {
+    if (_forcedRefreshSequenceNum == other._forcedRefreshSequenceNum) {
+        if (_forcedRefreshSequenceNum == 0)
+            return true;  // Default constructed value
+
+        if (sameEpoch(other)) {
+            if (_chunkVersion->majorVersion() == 0 && other._chunkVersion->majorVersion() == 0) {
+                return _chunkVersion->epoch() == OID();
+            }
+            return _chunkVersion->majorVersion() == other._chunkVersion->majorVersion() &&
+                _chunkVersion->minorVersion() == other._chunkVersion->minorVersion();
+        }
+    }
+    return false;
+}
+
+bool ComparableChunkVersion::operator<(const ComparableChunkVersion& other) const {
+    if (_forcedRefreshSequenceNum < other._forcedRefreshSequenceNum)
+        return true;
+    if (_forcedRefreshSequenceNum > other._forcedRefreshSequenceNum)
+        return false;
+    if (_forcedRefreshSequenceNum == 0)
+        return false;  // Default constructed value
+
+    if (sameEpoch(other) && other._chunkVersion->epoch() != OID() &&
+        _chunkVersion->majorVersion() != 0 && other._chunkVersion->majorVersion() != 0) {
+        return _chunkVersion->majorVersion() < other._chunkVersion->majorVersion() ||
+            (_chunkVersion->majorVersion() == other._chunkVersion->majorVersion() &&
+             _chunkVersion->minorVersion() < other._chunkVersion->minorVersion());
+    } else {
+        return _epochDisambiguatingSequenceNum < other._epochDisambiguatingSequenceNum;
+    }
+}
+
 }  // namespace mongo
diff --git a/src/mongo/s/chunk_manager.h b/src/mongo/s/chunk_manager.h
index 7f25a810a4a..e694a94c201 100644
--- a/src/mongo/s/chunk_manager.h
+++ b/src/mongo/s/chunk_manager.h
@@ -43,6 +43,7 @@
 #include "mongo/s/shard_key_pattern.h"
 #include "mongo/stdx/unordered_map.h"
 #include "mongo/util/concurrency/ticketholder.h"
+#include "mongo/util/read_through_cache.h"
 
 namespace mongo {
 
@@ -324,13 +325,128 @@ private:
 };
 
 /**
+ * Constructed to be used exclusively by the CatalogCache as a vector clock (Time) to drive
+ * CollectionCache's lookups.
+ *
+ * The ChunkVersion class contains a non comparable epoch, which makes impossible to compare two
+ * ChunkVersions when their epochs's differ.
+ *
+ * This class wraps a ChunkVersion object with a node-local sequence number
+ * (_epochDisambiguatingSequenceNum) that allows the comparision.
+ *
+ * This class should go away once a cluster-wide comparable ChunkVersion is implemented.
+ */
+class ComparableChunkVersion {
+public:
+    /**
+     * Creates a ComparableChunkVersion that wraps the given ChunkVersion.
+     * Each object created through this method will have a local sequence number greater than the
+     * previously created ones.
+     */
+    static ComparableChunkVersion makeComparableChunkVersion(const ChunkVersion& version);
+
+    /**
+     * Creates a ComparableChunkVersion object, which will artificially be greater than any that
+     * were previously created by `makeComparableChunkVersion`. Used as means to cause the
+     * collections cache to attempt a refresh in situations where causal consistency cannot be
+     * inferred.
+     */
+    static ComparableChunkVersion makeComparableChunkVersionForForcedRefresh();
+
+    /**
+     * Empty constructor needed by the ReadThroughCache.
+     *
+     * Instances created through this constructor will be always less then the ones created through
+     * the two static constructors, but they do not carry any meaningful value and can only be used
+     * for comparison purposes.
+     */
+    ComparableChunkVersion() = default;
+
+    const ChunkVersion& getVersion() const {
+        return *_chunkVersion;
+    }
+
+    std::string toString() const;
+
+    bool sameEpoch(const ComparableChunkVersion& other) const {
+        return _chunkVersion->epoch() == other._chunkVersion->epoch();
+    }
+
+    bool operator==(const ComparableChunkVersion& other) const;
+
+    bool operator!=(const ComparableChunkVersion& other) const {
+        return !(*this == other);
+    }
+
+    /**
+     * In case the two compared instances have different epochs, the most recently created one will
+     * be greater, otherwise the comparision will be driven by the major/minor versions of the
+     * underlying ChunkVersion.
+     */
+    bool operator<(const ComparableChunkVersion& other) const;
+
+    bool operator>(const ComparableChunkVersion& other) const {
+        return other < *this;
+    }
+
+    bool operator<=(const ComparableChunkVersion& other) const {
+        return !(*this > other);
+    }
+
+    bool operator>=(const ComparableChunkVersion& other) const {
+        return !(*this < other);
+    }
+
+private:
+    static AtomicWord<uint64_t> _epochDisambiguatingSequenceNumSource;
+    static AtomicWord<uint64_t> _forcedRefreshSequenceNumSource;
+
+    ComparableChunkVersion(uint64_t forcedRefreshSequenceNum,
+                           boost::optional<ChunkVersion> version,
+                           uint64_t epochDisambiguatingSequenceNum)
+        : _forcedRefreshSequenceNum(forcedRefreshSequenceNum),
+          _chunkVersion(std::move(version)),
+          _epochDisambiguatingSequenceNum(epochDisambiguatingSequenceNum) {}
+
+    uint64_t _forcedRefreshSequenceNum{0};
+
+    boost::optional<ChunkVersion> _chunkVersion;
+
+    // Locally incremented sequence number that allows to compare two colection versions with
+    // different epochs. Each new comparableChunkVersion will have a greater sequence number than
+    // the ones created before.
+    uint64_t _epochDisambiguatingSequenceNum{0};
+};
+
+/**
+ * This intermediate structure is necessary to be able to store UNSHARDED collections in the routing
+ * table history cache below. The reason is that currently the RoutingTableHistory class only
+ * supports sharded collections (i.e., collections which have entries in config.collections and
+ * config.chunks).
+ */
+struct OptionalRoutingTableHistory {
+    // UNSHARDED collection constructor
+    OptionalRoutingTableHistory() = default;
+
+    // SHARDED collection constructor
+    OptionalRoutingTableHistory(RoutingTableHistory&& rt) : optRt(std::move(rt)) {}
+
+    // If boost::none, the collection is UNSHARDED, otherwise it is SHARDED
+    boost::optional<RoutingTableHistory> optRt;
+};
+
+using RoutingTableHistoryCache =
+    ReadThroughCache<NamespaceString, OptionalRoutingTableHistory, ComparableChunkVersion>;
+using RoutingTableHistoryValueHandle = RoutingTableHistoryCache::ValueHandle;
+
+/**
  * Wrapper around a RoutingTableHistory, which pins it to a particular point in time.
  */
 class ChunkManager {
 public:
     ChunkManager(ShardId dbPrimary,
                  DatabaseVersion dbVersion,
-                 std::shared_ptr<RoutingTableHistory> rt,
+                 RoutingTableHistoryValueHandle rt,
                  boost::optional<Timestamp> clusterTime)
         : _dbPrimary(std::move(dbPrimary)),
           _dbVersion(std::move(dbVersion)),
@@ -340,7 +456,7 @@ public:
     // Methods supported on both sharded and unsharded collections
 
     bool isSharded() const {
-        return bool(_rt);
+        return bool(_rt->optRt);
     }
 
     const ShardId& dbPrimary() const {
@@ -352,7 +468,7 @@ public:
     }
 
     int numChunks() const {
-        return _rt ? _rt->numChunks() : 1;
+        return _rt->optRt ? _rt->optRt->numChunks() : 1;
     }
 
     std::string toString() const;
@@ -360,32 +476,32 @@ public:
     // Methods only supported on sharded collections (caller must check isSharded())
 
     const ShardKeyPattern& getShardKeyPattern() const {
-        return _rt->getShardKeyPattern();
+        return _rt->optRt->getShardKeyPattern();
     }
 
     const CollatorInterface* getDefaultCollator() const {
-        return _rt->getDefaultCollator();
+        return _rt->optRt->getDefaultCollator();
     }
 
     bool isUnique() const {
-        return _rt->isUnique();
+        return _rt->optRt->isUnique();
     }
 
     ChunkVersion getVersion() const {
-        return _rt->getVersion();
+        return _rt->optRt->getVersion();
     }
 
     ChunkVersion getVersion(const ShardId& shardId) const {
-        return _rt->getVersion(shardId);
+        return _rt->optRt->getVersion(shardId);
     }
 
     ChunkVersion getVersionForLogging(const ShardId& shardId) const {
-        return _rt->getVersionForLogging(shardId);
+        return _rt->optRt->getVersionForLogging(shardId);
     }
 
     template <typename Callable>
     void forEachChunk(Callable&& handler) const {
-        _rt->forEachChunk(
+        _rt->optRt->forEachChunk(
             [this, handler = std::forward<Callable>(handler)](const auto& chunkInfo) mutable {
                 if (!handler(Chunk{*chunkInfo, _clusterTime}))
                     return false;
@@ -461,14 +577,14 @@ public:
      * Returns the ids of all shards on which the collection has any chunks.
      */
     void getAllShardIds(std::set<ShardId>* all) const {
-        _rt->getAllShardIds(all);
+        _rt->optRt->getAllShardIds(all);
     }
 
     /**
      * Returns the number of shards on which the collection has any chunks
      */
     int getNShardsOwningChunks() const {
-        return _rt->getNShardsOwningChunks();
+        return _rt->optRt->getNShardsOwningChunks();
     }
 
     // Transforms query into bounds for each field in the shard key
@@ -500,30 +616,30 @@ public:
      * Returns true if, for this shard, the chunks are identical in both chunk managers
      */
     bool compatibleWith(const ChunkManager& other, const ShardId& shard) const {
-        return _rt->compatibleWith(*other._rt, shard);
+        return _rt->optRt->compatibleWith(*other._rt->optRt, shard);
     }
 
     bool uuidMatches(UUID uuid) const {
-        return _rt->uuidMatches(uuid);
+        return _rt->optRt->uuidMatches(uuid);
     }
 
     boost::optional<UUID> getUUID() const {
-        return _rt->getUUID();
+        return _rt->optRt->getUUID();
     }
 
     const boost::optional<TypeCollectionReshardingFields>& getReshardingFields() const {
-        return _rt->getReshardingFields();
+        return _rt->optRt->getReshardingFields();
     }
 
     const RoutingTableHistory& getRoutingTableHistory_ForTest() const {
-        return *_rt;
+        return *_rt->optRt;
     }
 
 private:
     ShardId _dbPrimary;
     DatabaseVersion _dbVersion;
 
-    std::shared_ptr<RoutingTableHistory> _rt;
+    RoutingTableHistoryValueHandle _rt;
 
     boost::optional<Timestamp> _clusterTime;
 };
diff --git a/src/mongo/s/chunk_manager_refresh_bm.cpp b/src/mongo/s/chunk_manager_refresh_bm.cpp
index a3feba2de1e..bd9b133301c 100644
--- a/src/mongo/s/chunk_manager_refresh_bm.cpp
+++ b/src/mongo/s/chunk_manager_refresh_bm.cpp
@@ -43,8 +43,10 @@ namespace {
 
 const NamespaceString kNss("test", "foo");
 
-std::shared_ptr<RoutingTableHistory> makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
-    return std::make_shared<RoutingTableHistory>(std::move(rt));
+RoutingTableHistoryValueHandle makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
+    const auto version = rt.getVersion();
+    return RoutingTableHistoryValueHandle(
+        std::move(rt), ComparableChunkVersion::makeComparableChunkVersion(version));
 }
 
 ChunkRange getRangeForChunk(int i, int nChunks) {
@@ -69,6 +71,7 @@ CollectionMetadata makeChunkManagerWithShardSelector(int nShards,
 
     std::vector<ChunkType> chunks;
     chunks.reserve(nChunks);
+
     for (uint32_t i = 0; i < nChunks; ++i) {
         chunks.emplace_back(kNss,
                             getRangeForChunk(i, nChunks),
@@ -144,13 +147,13 @@ auto BM_FullBuildOfChunkManager(benchmark::State& state, ShardSelectorFn selectS
     const uint32_t nChunks = state.range(1);
 
     const auto collEpoch = OID::gen();
-    const auto collName = NamespaceString("test.foo");
     const auto shardKeyPattern = KeyPattern(BSON("_id" << 1));
 
     std::vector<ChunkType> chunks;
     chunks.reserve(nChunks);
+
     for (uint32_t i = 0; i < nChunks; ++i) {
-        chunks.emplace_back(collName,
+        chunks.emplace_back(kNss,
                             getRangeForChunk(i, nChunks),
                             ChunkVersion{i + 1, 0, collEpoch},
                             selectShard(i, nShards, nChunks));
@@ -158,7 +161,7 @@ auto BM_FullBuildOfChunkManager(benchmark::State& state, ShardSelectorFn selectS
 
     for (auto keepRunning : state) {
         auto rt = RoutingTableHistory::makeNew(
-            collName, UUID::gen(), shardKeyPattern, nullptr, true, collEpoch, boost::none, chunks);
+            kNss, UUID::gen(), shardKeyPattern, nullptr, true, collEpoch, boost::none, chunks);
         benchmark::DoNotOptimize(
             CollectionMetadata(ChunkManager(ShardId("shard0"),
                                             DatabaseVersion(UUID::gen(), 1),
diff --git a/src/mongo/s/commands/SConscript b/src/mongo/s/commands/SConscript
index 780d4d4bc9a..8fc761a2e0b 100644
--- a/src/mongo/s/commands/SConscript
+++ b/src/mongo/s/commands/SConscript
@@ -124,6 +124,7 @@ env.Library(
         '$BUILD_DIR/mongo/db/commands/test_commands_enabled',
         '$BUILD_DIR/mongo/db/commands/write_commands_common',
         '$BUILD_DIR/mongo/db/ftdc/ftdc_server',
+        '$BUILD_DIR/mongo/db/initialize_api_parameters',
         '$BUILD_DIR/mongo/db/logical_session_cache_impl',
         '$BUILD_DIR/mongo/db/pipeline/aggregation',
         '$BUILD_DIR/mongo/db/query/command_request_response',
diff --git a/src/mongo/s/commands/cluster_drop_cmd.cpp b/src/mongo/s/commands/cluster_drop_cmd.cpp
index a69e3292597..f727489ccc0 100644
--- a/src/mongo/s/commands/cluster_drop_cmd.cpp
+++ b/src/mongo/s/commands/cluster_drop_cmd.cpp
@@ -88,7 +88,9 @@ public:
         // Invalidate the routing table cache entry for this collection so that we reload it the
         // next time it is accessed, even if sending the command to the config server fails due
         // to e.g. a NetworkError.
-        ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+        ON_BLOCK_EXIT([opCtx, nss] {
+            Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+        });
 
         auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
         auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
diff --git a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
index b4157bee9d9..531aa1ab41e 100644
--- a/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
+++ b/src/mongo/s/commands/cluster_merge_chunks_cmd.cpp
@@ -174,8 +174,10 @@ public:
             Shard::RetryPolicy::kNotIdempotent));
         uassertStatusOK(response.commandStatus);
 
-        Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(
-            nss, firstChunk.getShardId());
+        Grid::get(opCtx)
+            ->catalogCache()
+            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                nss, boost::none, firstChunk.getShardId());
 
         CommandHelpers::filterCommandReplyForPassthrough(response.response, &result);
         return true;
diff --git a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
index 01cdb91234e..f6e2d27c80f 100644
--- a/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
+++ b/src/mongo/s/commands/cluster_move_chunk_cmd.cpp
@@ -198,9 +198,14 @@ public:
                                                         cmdObj["waitForDelete"].trueValue(),
                                                     forceJumbo));
 
-        Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss,
-                                                                              chunk->getShardId());
-        Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss, to->getId());
+        Grid::get(opCtx)
+            ->catalogCache()
+            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                nss, boost::none, chunk->getShardId());
+        Grid::get(opCtx)
+            ->catalogCache()
+            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                nss, boost::none, to->getId());
 
         result.append("millis", t.millis());
         return true;
diff --git a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
index d27fd037d30..d4c4d7901ad 100644
--- a/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
+++ b/src/mongo/s/commands/cluster_shard_collection_cmd.cpp
@@ -105,7 +105,9 @@ public:
 
         // Invalidate the routing table cache entry for this collection so that we reload the
         // collection the next time it's accessed, even if we receive a failure, e.g. NetworkError.
-        ON_BLOCK_EXIT([opCtx, nss] { Grid::get(opCtx)->catalogCache()->onEpochChange(nss); });
+        ON_BLOCK_EXIT([opCtx, nss] {
+            Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(nss);
+        });
 
         auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
         auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
diff --git a/src/mongo/s/commands/cluster_split_cmd.cpp b/src/mongo/s/commands/cluster_split_cmd.cpp
index 19d33b3f10b..5532fac1daf 100644
--- a/src/mongo/s/commands/cluster_split_cmd.cpp
+++ b/src/mongo/s/commands/cluster_split_cmd.cpp
@@ -270,8 +270,10 @@ public:
                                                   ChunkRange(chunk->getMin(), chunk->getMax()),
                                                   {splitPoint}));
 
-        Grid::get(opCtx)->catalogCache()->invalidateShardForShardedCollection(nss,
-                                                                              chunk->getShardId());
+        Grid::get(opCtx)
+            ->catalogCache()
+            ->invalidateShardOrEntireCollectionEntryForShardedCollection(
+                nss, boost::none, chunk->getShardId());
 
         return true;
     }
diff --git a/src/mongo/s/commands/flush_router_config_cmd.cpp b/src/mongo/s/commands/flush_router_config_cmd.cpp
index bcc61a82a0a..d27b65a2c4d 100644
--- a/src/mongo/s/commands/flush_router_config_cmd.cpp
+++ b/src/mongo/s/commands/flush_router_config_cmd.cpp
@@ -102,7 +102,7 @@ public:
                       "Routing metadata flushed for collection {namespace}",
                       "Routing metadata flushed for collection",
                       "namespace"_attr = nss);
-                catalogCache->purgeCollection(nss);
+                catalogCache->invalidateCollectionEntry_LINEARIZABLE(nss);
             }
         }
 
diff --git a/src/mongo/s/commands/strategy.cpp b/src/mongo/s/commands/strategy.cpp
index 644c10e6bcb..f83b490d0ef 100644
--- a/src/mongo/s/commands/strategy.cpp
+++ b/src/mongo/s/commands/strategy.cpp
@@ -722,16 +722,12 @@ void runCommand(OperationContext* opCtx,
                 auto catalogCache = Grid::get(opCtx)->catalogCache();
                 if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
                     catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                        opCtx,
-                        staleNs,
-                        staleInfo->getVersionWanted(),
-                        staleInfo->getVersionReceived(),
-                        staleInfo->getShardId());
+                        staleNs, staleInfo->getVersionWanted(), staleInfo->getShardId());
                 } else {
                     // If we don't have the stale config info and therefore don't know the shard's
                     // id, we have to force all further targetting requests for the namespace to
                     // block on a refresh.
-                    catalogCache->onEpochChange(staleNs);
+                    catalogCache->invalidateCollectionEntry_LINEARIZABLE(staleNs);
                 }
 
 
@@ -1301,16 +1297,12 @@ void Strategy::explainFind(OperationContext* opCtx,
                 Grid::get(opCtx)
                     ->catalogCache()
                     ->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                        opCtx,
-                        staleNs,
-                        staleInfo->getVersionWanted(),
-                        staleInfo->getVersionReceived(),
-                        staleInfo->getShardId());
+                        staleNs, staleInfo->getVersionWanted(), staleInfo->getShardId());
             } else {
                 // If we don't have the stale config info and therefore don't know the shard's id,
                 // we have to force all further targetting requests for the namespace to block on
                 // a refresh.
-                Grid::get(opCtx)->catalogCache()->onEpochChange(staleNs);
+                Grid::get(opCtx)->catalogCache()->invalidateCollectionEntry_LINEARIZABLE(staleNs);
             }
 
             if (canRetry) {
diff --git a/src/mongo/s/comparable_chunk_version_test.cpp b/src/mongo/s/comparable_chunk_version_test.cpp
index 941d9bad080..8c1fa71fce2 100644
--- a/src/mongo/s/comparable_chunk_version_test.cpp
+++ b/src/mongo/s/comparable_chunk_version_test.cpp
@@ -29,8 +29,7 @@
 
 #include "mongo/platform/basic.h"
 
-#include "mongo/s/catalog_cache.h"
-#include "mongo/s/chunk_version.h"
+#include "mongo/s/chunk_manager.h"
 #include "mongo/unittest/unittest.h"
 
 namespace mongo {
@@ -95,9 +94,15 @@ TEST(ComparableChunkVersionTest, VersionLessSameEpoch) {
     ASSERT_FALSE(version2 > version3);
 }
 
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionsAreEqual) {
+    const ComparableChunkVersion defaultVersion1{}, defaultVersion2{};
+    ASSERT(defaultVersion1 == defaultVersion2);
+    ASSERT_FALSE(defaultVersion1 < defaultVersion2);
+    ASSERT_FALSE(defaultVersion1 > defaultVersion2);
+}
+
 TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLess) {
     const ComparableChunkVersion defaultVersion{};
-    ASSERT_EQ(defaultVersion.getLocalSequenceNum(), 0);
     const auto version1 =
         ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
     ASSERT(defaultVersion != version1);
@@ -105,5 +110,127 @@ TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLess) {
     ASSERT_FALSE(defaultVersion > version1);
 }
 
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanUnsharded) {
+    const ComparableChunkVersion defaultVersion{};
+    const auto version1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+    ASSERT(defaultVersion != version1);
+    ASSERT(defaultVersion < version1);
+    ASSERT_FALSE(defaultVersion > version1);
+}
+
+TEST(ComparableChunkVersionTest, DefaultConstructedVersionIsAlwaysLessThanDropped) {
+    const ComparableChunkVersion defaultVersion{};
+    const auto version1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+    ASSERT(defaultVersion != version1);
+    ASSERT(defaultVersion < version1);
+    ASSERT_FALSE(defaultVersion > version1);
+}
+
+TEST(ComparableChunkVersionTest, UnshardedAndDroppedAreEqual) {
+    const auto version1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+    const auto version2 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+    const auto version3 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+    const auto version4 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::DROPPED());
+    ASSERT(version1 == version2);
+    ASSERT(version1 == version3);
+    ASSERT(version2 == version4);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksAreDifferent) {
+    const auto oid = OID::gen();
+    const auto version1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+    const auto version2 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+    ASSERT(version1 != version2);
+    ASSERT(version1 < version2);
+    ASSERT_FALSE(version1 > version2);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksCompareBySequenceNum) {
+    const auto oid = OID::gen();
+    const auto version1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(1, 0, oid));
+    const auto noChunkSV1 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+
+    ASSERT(version1 != noChunkSV1);
+    ASSERT(noChunkSV1 > version1);
+
+    const auto noChunkSV2 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, oid));
+
+    ASSERT(noChunkSV1 != noChunkSV2);
+    ASSERT_FALSE(noChunkSV1 > noChunkSV2);
+    ASSERT(noChunkSV2 > noChunkSV1);
+
+    const auto version2 =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(2, 0, oid));
+
+    ASSERT(version2 != noChunkSV2);
+    ASSERT(version2 > noChunkSV2);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksGreaterThanUnshardedBySequenceNum) {
+    const auto unsharded =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+    const auto noChunkSV =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+
+    ASSERT(noChunkSV != unsharded);
+    ASSERT(noChunkSV > unsharded);
+}
+
+TEST(ComparableChunkVersionTest, UnshardedGreaterThanNoChunksBySequenceNum) {
+    const auto noChunkSV =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+    const auto unsharded =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion::UNSHARDED());
+
+    ASSERT(noChunkSV != unsharded);
+    ASSERT(unsharded > noChunkSV);
+}
+
+TEST(ComparableChunkVersionTest, NoChunksGreaterThanDefault) {
+    const auto noChunkSV =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(0, 0, OID::gen()));
+    const ComparableChunkVersion defaultVersion{};
+
+    ASSERT(noChunkSV != defaultVersion);
+    ASSERT(noChunkSV > defaultVersion);
+}
+
+TEST(ComparableChunkVersionTest, ForcedRefreshSequenceNumber) {
+    auto oid = OID::gen();
+    const ComparableChunkVersion defaultVersionBeforeForce;
+    const auto versionBeforeForce =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(100, 0, oid));
+
+    const auto forcedRefreshVersion =
+        ComparableChunkVersion::makeComparableChunkVersionForForcedRefresh();
+
+    const auto versionAfterForce =
+        ComparableChunkVersion::makeComparableChunkVersion(ChunkVersion(100, 0, oid));
+    const ComparableChunkVersion defaultVersionAfterForce;
+
+    ASSERT(defaultVersionBeforeForce != forcedRefreshVersion);
+    ASSERT(defaultVersionBeforeForce < forcedRefreshVersion);
+
+    ASSERT(versionBeforeForce != forcedRefreshVersion);
+    ASSERT(versionBeforeForce < forcedRefreshVersion);
+
+    ASSERT(versionAfterForce != forcedRefreshVersion);
+    ASSERT(versionAfterForce > forcedRefreshVersion);
+
+    ASSERT(defaultVersionAfterForce != forcedRefreshVersion);
+    ASSERT(defaultVersionAfterForce < forcedRefreshVersion);
+}
+
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/s/comparable_database_version_test.cpp b/src/mongo/s/comparable_database_version_test.cpp
index 3b2486a5ebd..d4201d56564 100644
--- a/src/mongo/s/comparable_database_version_test.cpp
+++ b/src/mongo/s/comparable_database_version_test.cpp
@@ -82,9 +82,15 @@ TEST(ComparableDatabaseVersionTest, VersionLessSameUuid) {
     ASSERT_FALSE(version1 > version2);
 }
 
+TEST(ComparableDatabaseVersionTest, DefaultConstructedVersionsAreEqual) {
+    const ComparableDatabaseVersion defaultVersion1{}, defaultVersion2{};
+    ASSERT(defaultVersion1 == defaultVersion2);
+    ASSERT_FALSE(defaultVersion1 < defaultVersion2);
+    ASSERT_FALSE(defaultVersion1 > defaultVersion2);
+}
+
 TEST(ComparableDatabaseVersionTest, DefaultConstructedVersionIsAlwaysLess) {
     const ComparableDatabaseVersion defaultVersion{};
-    ASSERT_EQ(defaultVersion.getLocalSequenceNum(), 0);
     const auto version1 =
         ComparableDatabaseVersion::makeComparableDatabaseVersion(DatabaseVersion(UUID::gen(), 0));
     ASSERT(defaultVersion != version1);
diff --git a/src/mongo/s/query/async_results_merger.cpp b/src/mongo/s/query/async_results_merger.cpp
index 2ad05010afb..98aec3332ec 100644
--- a/src/mongo/s/query/async_results_merger.cpp
+++ b/src/mongo/s/query/async_results_merger.cpp
@@ -462,8 +462,11 @@ Status AsyncResultsMerger::_askForNextBatch(WithLock, size_t remoteIndex) {
         cmdObj = newCmdBob.obj();
     }
 
+    // Never pass API parameters with getMore.
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(_opCtx);
     executor::RemoteCommandRequest request(
         remote.getTargetHost(), remote.cursorNss.db().toString(), cmdObj, _opCtx);
+    ignoreApiParametersBlock.release();
 
     auto callbackStatus =
         _executor->scheduleRemoteCommand(request, [this, remoteIndex](auto const& cbData) {
diff --git a/src/mongo/s/query/cluster_client_cursor.h b/src/mongo/s/query/cluster_client_cursor.h
index 44aae05e34d..87e3271e692 100644
--- a/src/mongo/s/query/cluster_client_cursor.h
+++ b/src/mongo/s/query/cluster_client_cursor.h
@@ -32,8 +32,8 @@
 #include <boost/optional.hpp>
 
 #include "mongo/client/read_preference.h"
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/auth/user_name.h"
-#include "mongo/db/initialize_api_parameters.h"
 #include "mongo/db/jsobj.h"
 #include "mongo/db/logical_session_id.h"
 #include "mongo/s/query/cluster_client_cursor_params.h"
diff --git a/src/mongo/s/query/cluster_client_cursor_params.h b/src/mongo/s/query/cluster_client_cursor_params.h
index d8bb0ae8da0..b0fae249884 100644
--- a/src/mongo/s/query/cluster_client_cursor_params.h
+++ b/src/mongo/s/query/cluster_client_cursor_params.h
@@ -36,10 +36,10 @@
 
 #include "mongo/bson/bsonobj.h"
 #include "mongo/client/read_preference.h"
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/auth/privilege.h"
 #include "mongo/db/auth/user_name.h"
 #include "mongo/db/cursor_id.h"
-#include "mongo/db/initialize_api_parameters.h"
 #include "mongo/db/namespace_string.h"
 #include "mongo/db/pipeline/pipeline.h"
 #include "mongo/db/query/cursor_response.h"
diff --git a/src/mongo/s/query/cluster_find.cpp b/src/mongo/s/query/cluster_find.cpp
index 3996e01c326..57925b873ed 100644
--- a/src/mongo/s/query/cluster_find.cpp
+++ b/src/mongo/s/query/cluster_find.cpp
@@ -504,18 +504,18 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
     // Re-target and re-send the initial find command to the shards until we have established the
     // shard version.
     for (size_t retries = 1; retries <= kMaxRetries; ++retries) {
-        auto routingInfoStatus = getCollectionRoutingInfoForTxnCmd(opCtx, query.nss());
-        if (routingInfoStatus == ErrorCodes::NamespaceNotFound) {
+        auto swCM = getCollectionRoutingInfoForTxnCmd(opCtx, query.nss());
+        if (swCM == ErrorCodes::NamespaceNotFound) {
             // If the database doesn't exist, we successfully return an empty result set without
             // creating a cursor.
             return CursorId(0);
         }
 
-        auto routingInfo = uassertStatusOK(routingInfoStatus);
+        const auto cm = uassertStatusOK(std::move(swCM));
 
         try {
             return runQueryWithoutRetrying(
-                opCtx, query, readPref, routingInfo, results, partialResultsReturned);
+                opCtx, query, readPref, cm, results, partialResultsReturned);
         } catch (ExceptionFor<ErrorCodes::StaleDbVersion>& ex) {
             if (retries >= kMaxRetries) {
                 // Check if there are no retries remaining, so the last received error can be
@@ -577,13 +577,9 @@ CursorId ClusterFind::runQuery(OperationContext* opCtx,
             if (ex.code() != ErrorCodes::ShardInvalidatedForTargeting) {
                 if (auto staleInfo = ex.extraInfo<StaleConfigInfo>()) {
                     catalogCache->invalidateShardOrEntireCollectionEntryForShardedCollection(
-                        opCtx,
-                        query.nss(),
-                        staleInfo->getVersionWanted(),
-                        staleInfo->getVersionReceived(),
-                        staleInfo->getShardId());
+                        query.nss(), staleInfo->getVersionWanted(), staleInfo->getShardId());
                 } else {
-                    catalogCache->onEpochChange(query.nss());
+                    catalogCache->invalidateCollectionEntry_LINEARIZABLE(query.nss());
                 }
             }
 
@@ -776,6 +772,7 @@ StatusWith<CursorResponse> ClusterFind::runGetMore(OperationContext* opCtx,
         StatusWith<ClusterQueryResult> next =
             Status{ErrorCodes::InternalError, "uninitialized cluster query result"};
         try {
+            IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
             next = pinnedCursor.getValue()->next(context);
         } catch (const ExceptionFor<ErrorCodes::CloseChangeStream>&) {
             // This exception is thrown when a $changeStream stage encounters an event
diff --git a/src/mongo/s/request_types/set_shard_version_request.h b/src/mongo/s/request_types/set_shard_version_request.h
index bfd7385ffae..44cacff0415 100644
--- a/src/mongo/s/request_types/set_shard_version_request.h
+++ b/src/mongo/s/request_types/set_shard_version_request.h
@@ -98,6 +98,7 @@ private:
     SetShardVersionRequest();
 
     bool _isAuthoritative{false};
+    // TODO (SERVER-50812) remove this flag that isn't used anymore
     bool _forceRefresh{false};
 
     boost::optional<NamespaceString> _nss;
diff --git a/src/mongo/s/sessions_collection_sharded.cpp b/src/mongo/s/sessions_collection_sharded.cpp
index 060c1158dbd..22915bd2c0a 100644
--- a/src/mongo/s/sessions_collection_sharded.cpp
+++ b/src/mongo/s/sessions_collection_sharded.cpp
@@ -123,8 +123,6 @@ void SessionsCollectionSharded::checkSessionsCollectionExists(OperationContext*
     const auto cm = uassertStatusOK(
         Grid::get(opCtx)->catalogCache()->getShardedCollectionRoutingInfoWithRefresh(
             opCtx, NamespaceString::kLogicalSessionsNamespace));
-
-    uassert(ErrorCodes::NamespaceNotFound, "config.system.sessions does not exist", cm.isSharded());
 }
 
 void SessionsCollectionSharded::refreshSessions(OperationContext* opCtx,
diff --git a/src/mongo/s/sharding_test_fixture_common.cpp b/src/mongo/s/sharding_test_fixture_common.cpp
index 95dd505687b..2ac936d3977 100644
--- a/src/mongo/s/sharding_test_fixture_common.cpp
+++ b/src/mongo/s/sharding_test_fixture_common.cpp
@@ -47,9 +47,11 @@ ShardingTestFixtureCommon::ShardingTestFixtureCommon() {
 
 ShardingTestFixtureCommon::~ShardingTestFixtureCommon() = default;
 
-std::shared_ptr<RoutingTableHistory> ShardingTestFixtureCommon::makeStandaloneRoutingTableHistory(
+RoutingTableHistoryValueHandle ShardingTestFixtureCommon::makeStandaloneRoutingTableHistory(
     RoutingTableHistory rt) {
-    return std::make_shared<RoutingTableHistory>(std::move(rt));
+    const auto version = rt.getVersion();
+    return RoutingTableHistoryValueHandle(
+        std::move(rt), ComparableChunkVersion::makeComparableChunkVersion(version));
 }
 
 void ShardingTestFixtureCommon::onCommand(NetworkTestEnv::OnCommandFunction func) {
diff --git a/src/mongo/s/sharding_test_fixture_common.h b/src/mongo/s/sharding_test_fixture_common.h
index 0ecbbb30695..52377d7fbc5 100644
--- a/src/mongo/s/sharding_test_fixture_common.h
+++ b/src/mongo/s/sharding_test_fixture_common.h
@@ -55,8 +55,7 @@ public:
      * which can be used to pass to ChunkManager for tests, which specifically target the behaviour
      * of the ChunkManager.
      */
-    static std::shared_ptr<RoutingTableHistory> makeStandaloneRoutingTableHistory(
-        RoutingTableHistory rt);
+    static RoutingTableHistoryValueHandle makeStandaloneRoutingTableHistory(RoutingTableHistory rt);
 
 protected:
     ShardingTestFixtureCommon();
diff --git a/src/mongo/s/transaction_router.cpp b/src/mongo/s/transaction_router.cpp
index b7b26698e78..c269d734365 100644
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@@ -125,7 +125,6 @@ BSONObj appendReadConcernForTxn(BSONObj cmd,
 }
 
 BSONObjBuilder appendFieldsForStartTransaction(BSONObj cmd,
-                                               APIParameters apiParameters,
                                                repl::ReadConcernArgs readConcernArgs,
                                                boost::optional<LogicalTime> atClusterTime,
                                                bool doAppendStartTransaction) {
@@ -134,8 +133,6 @@ BSONObjBuilder appendFieldsForStartTransaction(BSONObj cmd,
         appendReadConcernForTxn(std::move(cmd), readConcernArgs, atClusterTime);
 
     BSONObjBuilder bob(std::move(cmdWithReadConcern));
-
-    apiParameters.appendInfo(&bob);
     if (doAppendStartTransaction) {
         bob.append(OperationSessionInfoFromClient::kStartTransactionFieldName, true);
     }
@@ -433,7 +430,6 @@ BSONObj TransactionRouter::Participant::attachTxnFieldsIfNeeded(
 
     BSONObjBuilder newCmd = mustStartTransaction
         ? appendFieldsForStartTransaction(std::move(cmd),
-                                          sharedOptions.apiParameters,
                                           sharedOptions.readConcernArgs,
                                           sharedOptions.atClusterTime,
                                           !hasStartTxn)
@@ -1203,6 +1199,8 @@ BSONObj TransactionRouter::Router::abortTransaction(OperationContext* opCtx) {
                 "txnNumber"_attr = o().txnNumber,
                 "numParticipantShards"_attr = o().participants.size());
 
+    // Omit API parameters from abortTransaction.
+    IgnoreAPIParametersBlock ignoreApiParametersBlock(opCtx);
     const auto responses = gatherResponses(opCtx,
                                            NamespaceString::kAdminDb,
                                            ReadPreferenceSetting{ReadPreference::PrimaryOnly},
diff --git a/src/mongo/s/transaction_router.h b/src/mongo/s/transaction_router.h
index 25ce17831fe..3d6be675077 100644
--- a/src/mongo/s/transaction_router.h
+++ b/src/mongo/s/transaction_router.h
@@ -31,8 +31,8 @@
 
 #include <boost/optional.hpp>
 
+#include "mongo/db/api_parameters.h"
 #include "mongo/db/commands/txn_cmds_gen.h"
-#include "mongo/db/initialize_api_parameters.h"
 #include "mongo/db/logical_session_id.h"
 #include "mongo/db/operation_context.h"
 #include "mongo/db/repl/read_concern_args.h"
diff --git a/src/mongo/s/transaction_router_test.cpp b/src/mongo/s/transaction_router_test.cpp
index eb827201e84..a507d3e4f3f 100644
--- a/src/mongo/s/transaction_router_test.cpp
+++ b/src/mongo/s/transaction_router_test.cpp
@@ -316,16 +316,9 @@ TEST_F(TransactionRouterTestWithDefaultSession, CannotContiueTxnWithoutStarting)
         ErrorCodes::NoSuchTransaction);
 }
 
-TEST_F(TransactionRouterTestWithDefaultSession,
-       NewParticipantMustAttachTxnAndReadConcernAndAPIParams) {
+TEST_F(TransactionRouterTestWithDefaultSession, NewParticipantMustAttachTxnAndReadConcern) {
     TxnNumber txnNum{3};
 
-    APIParameters apiParameters = APIParameters();
-    apiParameters.setAPIVersion("1");
-    apiParameters.setAPIStrict(false);
-    apiParameters.setAPIDeprecationErrors(false);
-    APIParameters::get(operationContext()) = apiParameters;
-
     auto txnRouter = TransactionRouter::get(operationContext());
     txnRouter.beginOrContinueTxn(
         operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
@@ -337,9 +330,6 @@ TEST_F(TransactionRouterTestWithDefaultSession,
                                   << BSON("level"
                                           << "snapshot"
                                           << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
-                                  << "apiVersion"
-                                  << "1"
-                                  << "apiStrict" << false << "apiDeprecationErrors" << false
                                   << "startTransaction" << true << "coordinator" << true
                                   << "autocommit" << false << "txnNumber" << txnNum);
 
@@ -369,9 +359,6 @@ TEST_F(TransactionRouterTestWithDefaultSession,
                           << BSON("level"
                                   << "snapshot"
                                   << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
-                          << "apiVersion"
-                          << "1"
-                          << "apiStrict" << false << "apiDeprecationErrors" << false
                           << "startTransaction" << true << "autocommit" << false << "txnNumber"
                           << txnNum);
 
@@ -735,40 +722,6 @@ TEST_F(TransactionRouterTestWithDefaultSession, AttachTxnValidatesReadConcernIfA
     }
 }
 
-TEST_F(TransactionRouterTestWithDefaultSession, AttachTxnAttachesAPIParameters) {
-    APIParameters apiParams = APIParameters();
-    apiParams.setAPIVersion("2");
-    apiParams.setAPIStrict(true);
-    apiParams.setAPIDeprecationErrors(true);
-
-    APIParameters::get(operationContext()) = apiParams;
-
-    TxnNumber txnNum{3};
-    auto txnRouter = TransactionRouter::get(operationContext());
-    txnRouter.beginOrContinueTxn(
-        operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
-    txnRouter.setDefaultAtClusterTime(operationContext());
-
-    {
-        auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(),
-                                                        shard1,
-                                                        BSON("insert"
-                                                             << "test"));
-        ASSERT_BSONOBJ_EQ(BSON("insert"
-                               << "test"
-                               << "readConcern"
-                               << BSON("level"
-                                       << "snapshot"
-                                       << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
-                               << "apiVersion"
-                               << "2"
-                               << "apiStrict" << true << "apiDeprecationErrors" << true
-                               << "startTransaction" << true << "coordinator" << true
-                               << "autocommit" << false << "txnNumber" << txnNum),
-                          newCmd);
-    }
-}
-
 TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyAPIParametersAfterFirstStatement) {
     APIParameters apiParameters = APIParameters();
     apiParameters.setAPIVersion("1");
@@ -787,40 +740,6 @@ TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyAPIParametersAfterF
         4937701);
 }
 
-TEST_F(TransactionRouterTestWithDefaultSession, PassesThroughAPIParametersToParticipants) {
-    APIParameters apiParams = APIParameters();
-    apiParams.setAPIVersion("2");
-    apiParams.setAPIStrict(true);
-    apiParams.setAPIDeprecationErrors(true);
-
-    APIParameters::get(operationContext()) = apiParams;
-
-    TxnNumber txnNum{3};
-
-    auto txnRouter = TransactionRouter::get(operationContext());
-    txnRouter.beginOrContinueTxn(
-        operationContext(), txnNum, TransactionRouter::TransactionActions::kStart);
-    txnRouter.setDefaultAtClusterTime(operationContext());
-
-    BSONObj expectedNewObj = BSON("insert"
-                                  << "test"
-                                  << "readConcern"
-                                  << BSON("level"
-                                          << "snapshot"
-                                          << "atClusterTime" << kInMemoryLogicalTime.asTimestamp())
-                                  << "apiVersion"
-                                  << "2"
-                                  << "apiStrict" << true << "apiDeprecationErrors" << true
-                                  << "startTransaction" << true << "coordinator" << true
-                                  << "autocommit" << false << "txnNumber" << txnNum);
-
-    auto newCmd = txnRouter.attachTxnFieldsIfNeeded(operationContext(),
-                                                    shard1,
-                                                    BSON("insert"
-                                                         << "test"));
-    ASSERT_BSONOBJ_EQ(expectedNewObj, newCmd);
-}
-
 TEST_F(TransactionRouterTestWithDefaultSession, CannotSpecifyReadConcernAfterFirstStatement) {
     TxnNumber txnNum{3};
 
@@ -3294,6 +3213,43 @@ TEST_F(TransactionRouterMetricsTest, LogsTransactionsOverSlowMSThreshold) {
     assertPrintedExactlyOneSlowLogLine();
 }
 
+TEST_F(TransactionRouterMetricsTest, LogsTransactionsWithAPIParameters) {
+    const auto originalSlowMS = serverGlobalParams.slowMS;
+    const auto originalSampleRate = serverGlobalParams.sampleRate;
+
+    serverGlobalParams.slowMS = 100;
+    serverGlobalParams.sampleRate = 1;
+
+    // Reset the global parameters to their original values after this test exits.
+    ON_BLOCK_EXIT([originalSlowMS, originalSampleRate] {
+        serverGlobalParams.slowMS = originalSlowMS;
+        serverGlobalParams.sampleRate = originalSampleRate;
+    });
+
+    APIParameters::get(operationContext()).setAPIVersion("1");
+    APIParameters::get(operationContext()).setAPIStrict(true);
+    APIParameters::get(operationContext()).setAPIDeprecationErrors(false);
+    beginTxnWithDefaultTxnNumber();
+    tickSource()->advance(Milliseconds(101));
+    runCommit(kDummyOkRes);
+    assertPrintedExactlyOneSlowLogLine();
+
+    int nFound = 0;
+    for (auto&& bson : getCapturedBSONFormatLogMessages()) {
+        if (bson["id"].Int() != 51805) {
+            continue;
+        }
+
+        auto parameters = bson["attr"]["parameters"];
+        ASSERT_EQUALS(parameters["apiVersion"].String(), "1");
+        ASSERT_EQUALS(parameters["apiStrict"].Bool(), true);
+        ASSERT_EQUALS(parameters["apiDeprecationErrors"].Bool(), false);
+        ++nFound;
+    }
+
+    ASSERT_EQUALS(nFound, 1);
+}
+
 TEST_F(TransactionRouterMetricsTest, DoesNotLogTransactionsWithSampleRateZero) {
     const auto originalSlowMS = serverGlobalParams.slowMS;
     const auto originalSampleRate = serverGlobalParams.sampleRate;
diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.cpp b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
index f7189efdfe9..6794dabc3ca 100644
--- a/src/mongo/s/write_ops/chunk_manager_targeter.cpp
+++ b/src/mongo/s/write_ops/chunk_manager_targeter.cpp
@@ -791,7 +791,7 @@ int ChunkManagerTargeter::getNShardsOwningChunks() const {
 
 void ChunkManagerTargeter::_refreshShardVersionNow(OperationContext* opCtx) {
     uassertStatusOK(
-        Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, _nss, true));
+        Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(opCtx, _nss));
 
     _init(opCtx);
 }
diff --git a/src/mongo/scripting/engine.cpp b/src/mongo/scripting/engine.cpp
index d941e9834af..42ccc5f7154 100644
--- a/src/mongo/scripting/engine.cpp
+++ b/src/mongo/scripting/engine.cpp
@@ -249,6 +249,7 @@ void Scope::loadStored(OperationContext* opCtx, bool ignoreNotConnected) {
                 v.type() != BSONType::CodeWScope);
 
         if (MONGO_unlikely(mr_killop_test_fp.shouldFail())) {
+            LOGV2(5062200, "Pausing mr_killop_test_fp for system.js entry", "entryName"_attr = n);
 
             /* This thread sleep makes the interrupts in the test come in at a time
              *  where the js misses the interrupt and throw an exception instead of
diff --git a/src/mongo/shell/collection.js b/src/mongo/shell/collection.js
index 20522653bc2..bf5aa3a2653 100644
--- a/src/mongo/shell/collection.js
+++ b/src/mongo/shell/collection.js
@@ -1322,9 +1322,7 @@ DBCollection.prototype.getSlaveOk = function() {
     return this.getSecondaryOk();
 };
 
-DBCollection.prototype.setSecondaryOk = function(value) {
-    if (value === undefined)
-        value = true;
+DBCollection.prototype.setSecondaryOk = function(value = true) {
     this._secondaryOk = value;
 };
 
diff --git a/src/mongo/shell/db.js b/src/mongo/shell/db.js
index 73fdb9c25e2..16c109e9cb4 100644
--- a/src/mongo/shell/db.js
+++ b/src/mongo/shell/db.js
@@ -1091,28 +1091,17 @@ DB.prototype.printSecondaryReplicationInfo = function() {
         return null;
     }
 
-    function g(x) {
-        assert(x, "how could this be null (printSecondaryReplicationInfo gx)");
-        print("source: " + x.host);
-        if (x.syncedTo) {
-            var st = new Date(DB.tsToSeconds(x.syncedTo) * 1000);
-            getReplLag(st);
-        } else {
-            print("\tdoing initial sync");
-        }
-    }
-
-    function r(x) {
-        assert(x, "how could this be null (printSecondaryReplicationInfo rx)");
-        if (x.state == 1 || x.state == 7) {  // ignore primaries (1) and arbiters (7)
+    function printNodeReplicationInfo(node) {
+        assert(node);
+        if (node.state === 1 || node.state === 7) {  // ignore primaries (1) and arbiters (7)
             return;
         }
 
-        print("source: " + x.name);
-        if (x.optime) {
-            getReplLag(x.optimeDate);
+        print("source: " + node.name);
+        if (node.optime && node.health != 0) {
+            getReplLag(node.optimeDate);
         } else {
-            print("\tno replication info, yet.  State: " + x.stateStr);
+            print("\tno replication info, yet.  State: " + node.stateStr);
         }
     }
 
@@ -1136,7 +1125,7 @@ DB.prototype.printSecondaryReplicationInfo = function() {
         }
 
         for (i in status.members) {
-            r(status.members[i]);
+            printNodeReplicationInfo(status.members[i]);
         }
     }
 };
@@ -1255,7 +1244,7 @@ DB.autocomplete = function(obj) {
     return ret;
 };
 
-DB.prototype.setSlaveOk = function(value) {
+DB.prototype.setSlaveOk = function(value = true) {
     print(
         "WARNING: setSlaveOk() is deprecated and may be removed in the next major release. Please use setSecondaryOk() instead.");
     this.setSecondaryOk(value);
@@ -1267,9 +1256,7 @@ DB.prototype.getSlaveOk = function() {
     return this.getSecondaryOk();
 };
 
-DB.prototype.setSecondaryOk = function(value) {
-    if (value == undefined)
-        value = true;
+DB.prototype.setSecondaryOk = function(value = true) {
     this._secondaryOk = value;
 };
 
diff --git a/src/mongo/shell/mongo.js b/src/mongo/shell/mongo.js
index 23a2cf775f1..5beecf5646d 100644
--- a/src/mongo/shell/mongo.js
+++ b/src/mongo/shell/mongo.js
@@ -39,9 +39,7 @@ Mongo.prototype.getSlaveOk = function() {
     return this.getSecondaryOk();
 };
 
-Mongo.prototype.setSecondaryOk = function(value) {
-    if (value == undefined)
-        value = true;
+Mongo.prototype.setSecondaryOk = function(value = true) {
     this.secondaryOk = value;
 };
 
diff --git a/src/mongo/transport/service_executor_fixed.cpp b/src/mongo/transport/service_executor_fixed.cpp
index bdf75660dce..f48a9d7a170 100644
--- a/src/mongo/transport/service_executor_fixed.cpp
+++ b/src/mongo/transport/service_executor_fixed.cpp
@@ -64,7 +64,9 @@ ServiceExecutorFixed::ServiceExecutorFixed(ThreadPool::Options options)
     _options.onCreateThread =
         [this, onCreate = std::move(_options.onCreateThread)](const std::string& name) mutable {
             _executorContext = std::make_unique<ExecutorThreadContext>(this->weak_from_this());
-            onCreate(name);
+            if (onCreate) {
+                onCreate(name);
+            }
         };
     _threadPool = std::make_unique<ThreadPool>(_options);
 }
diff --git a/src/mongo/util/concurrency/thread_pool.cpp b/src/mongo/util/concurrency/thread_pool.cpp
index 680d397946f..0e8eda183b4 100644
--- a/src/mongo/util/concurrency/thread_pool.cpp
+++ b/src/mongo/util/concurrency/thread_pool.cpp
@@ -33,23 +33,37 @@
 
 #include "mongo/util/concurrency/thread_pool.h"
 
+#include <deque>
+#include <fmt/format.h>
+#include <list>
+#include <sstream>
+#include <vector>
+
 #include "mongo/base/status.h"
 #include "mongo/logv2/log.h"
 #include "mongo/platform/atomic_word.h"
+#include "mongo/platform/mutex.h"
+#include "mongo/stdx/condition_variable.h"
 #include "mongo/util/assert_util.h"
 #include "mongo/util/concurrency/idle_thread_block.h"
 #include "mongo/util/concurrency/thread_name.h"
-#include "mongo/util/str.h"
-
-#include <sstream>
+#include "mongo/util/hierarchical_acquisition.h"
 
 namespace mongo {
 
 namespace {
 
+using namespace fmt::literals;
+
 // Counter used to assign unique names to otherwise-unnamed thread pools.
 AtomicWord<int> nextUnnamedThreadPoolId{1};
 
+std::string threadIdToString(stdx::thread::id id) {
+    std::ostringstream oss;
+    oss << id;
+    return oss.str();
+}
+
 /**
  * Sets defaults and checks bounds limits on "options", and returns it.
  *
@@ -57,10 +71,10 @@ AtomicWord<int> nextUnnamedThreadPoolId{1};
  */
 ThreadPool::Options cleanUpOptions(ThreadPool::Options&& options) {
     if (options.poolName.empty()) {
-        options.poolName = str::stream() << "ThreadPool" << nextUnnamedThreadPoolId.fetchAndAdd(1);
+        options.poolName = "ThreadPool{}"_format(nextUnnamedThreadPoolId.fetchAndAdd(1));
     }
     if (options.threadNamePrefix.empty()) {
-        options.threadNamePrefix = str::stream() << options.poolName << '-';
+        options.threadNamePrefix = "{}-"_format(options.poolName);
     }
     if (options.maxThreads < 1) {
         LOGV2_FATAL(28702,
@@ -85,28 +99,144 @@ ThreadPool::Options cleanUpOptions(ThreadPool::Options&& options) {
 
 }  // namespace
 
-ThreadPool::Options::Options(const ThreadPool::Limits& limits)
-    : minThreads(limits.minThreads),
-      maxThreads(limits.maxThreads),
-      maxIdleThreadAge(limits.maxIdleThreadAge) {}
 
-ThreadPool::ThreadPool(Options options) : _options(cleanUpOptions(std::move(options))) {}
+// Public functions forwarded from ThreadPool.
+class ThreadPool::Impl {
+public:
+    explicit Impl(Options options);
+    ~Impl();
+    void startup();
+    void shutdown();
+    void join();
+    void schedule(Task task);
+    void waitForIdle();
+    Stats getStats() const;
+
+private:
+    /**
+     * Representation of the stage of life of a thread pool.
+     *
+     * A pool starts out in the preStart state, and ends life in the shutdownComplete state.  Work
+     * may only be scheduled in the preStart and running states. Threads may only be started in the
+     * running state. In shutdownComplete, there are no remaining threads or pending tasks to
+     * execute.
+     *
+     * Diagram of legal transitions:
+     *
+     * preStart -> running -> joinRequired -> joining -> shutdownComplete
+     *        \               ^
+     *         \_____________/
+     */
+    enum LifecycleState { preStart, running, joinRequired, joining, shutdownComplete };
+
+    /** The thread body for worker threads. */
+    void _workerThreadBody(const std::string& threadName) noexcept;
+
+    /**
+     * Starts a worker thread, unless _options.maxThreads threads are already running or
+     * _state is not running.
+     */
+    void _startWorkerThread_inlock();
+
+    /**
+     * This is the run loop of a worker thread, invoked by _workerThreadBody.
+     */
+    void _consumeTasks();
+
+    /**
+     * Implementation of shutdown once _mutex is locked.
+     */
+    void _shutdown_inlock();
+
+    /**
+     * Implementation of join once _mutex is owned by "lk".
+     */
+    void _join_inlock(stdx::unique_lock<Latch>* lk);
+
+    /**
+     * Runs the remaining tasks on a new thread as part of the join process, blocking until
+     * complete. Caller must not hold the mutex!
+     */
+    void _drainPendingTasks();
+
+    /**
+     * Executes one task from _pendingTasks. "lk" must own _mutex, and _pendingTasks must have at
+     * least one entry.
+     */
+    void _doOneTask(stdx::unique_lock<Latch>* lk) noexcept;
+
+    /**
+     * Changes the lifecycle state (_state) of the pool and wakes up any threads waiting for a state
+     * change. Has no effect if _state == newState.
+     */
+    void _setState_inlock(LifecycleState newState);
+
+    /**
+     * Waits for all remaining retired threads to join.
+     * If a thread's _workerThreadBody() were ever to attempt to reacquire
+     * ThreadPool::_mutex after that thread had been added to _retiredThreads,
+     * it could cause a deadlock.
+     */
+    void _joinRetired_inlock();
+
+    // These are the options with which the pool was configured at construction time.
+    const Options _options;
+
+    // Mutex guarding all non-const member variables.
+    mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ThreadPool::_mutex");
+
+    // This variable represents the lifecycle state of the pool.
+    //
+    // Work may only be scheduled in states preStart and running, and only executes in states
+    // running and shuttingDown.
+    LifecycleState _state = preStart;
+
+    // Condition signaled to indicate that there is work in the _pendingTasks queue, or
+    // that the system is shutting down.
+    stdx::condition_variable _workAvailable;
+
+    // Condition signaled to indicate that there is no work in the _pendingTasks queue.
+    stdx::condition_variable _poolIsIdle;
+
+    // Condition variable signaled whenever _state changes.
+    stdx::condition_variable _stateChange;
+
+    // Queue of yet-to-be-executed tasks.
+    std::deque<Task> _pendingTasks;
+
+    // List of threads serving as the worker pool.
+    std::list<stdx::thread> _threads;
+
+    // List of threads that are retired and pending join
+    std::list<stdx::thread> _retiredThreads;
+
+    // Count of idle threads.
+    size_t _numIdleThreads = 0;
+
+    // Id counter for assigning thread names
+    size_t _nextThreadId = 0;
+
+    // The last time that _pendingTasks.size() grew to be at least _threads.size().
+    Date_t _lastFullUtilizationDate;
+};
+
+ThreadPool::Impl::Impl(Options options) : _options(cleanUpOptions(std::move(options))) {}
 
-ThreadPool::~ThreadPool() {
+ThreadPool::Impl::~Impl() {
     stdx::unique_lock<Latch> lk(_mutex);
     _shutdown_inlock();
-    if (shutdownComplete != _state) {
+    if (_state != shutdownComplete) {
         _join_inlock(&lk);
     }
 
-    if (shutdownComplete != _state) {
+    if (_state != shutdownComplete) {
         LOGV2_FATAL(28704, "Failed to shutdown pool during destruction");
     }
     invariant(_threads.empty());
     invariant(_pendingTasks.empty());
 }
 
-void ThreadPool::startup() {
+void ThreadPool::Impl::startup() {
     stdx::lock_guard<Latch> lk(_mutex);
     if (_state != preStart) {
         LOGV2_FATAL(28698,
@@ -116,19 +246,18 @@ void ThreadPool::startup() {
     }
     _setState_inlock(running);
     invariant(_threads.empty());
-    const size_t numToStart =
-        std::min(_options.maxThreads, std::max(_options.minThreads, _pendingTasks.size()));
+    size_t numToStart = std::clamp(_pendingTasks.size(), _options.minThreads, _options.maxThreads);
     for (size_t i = 0; i < numToStart; ++i) {
         _startWorkerThread_inlock();
     }
 }
 
-void ThreadPool::shutdown() {
+void ThreadPool::Impl::shutdown() {
     stdx::lock_guard<Latch> lk(_mutex);
     _shutdown_inlock();
 }
 
-void ThreadPool::_shutdown_inlock() {
+void ThreadPool::Impl::_shutdown_inlock() {
     switch (_state) {
         case preStart:
         case running:
@@ -143,38 +272,30 @@ void ThreadPool::_shutdown_inlock() {
     MONGO_UNREACHABLE;
 }
 
-void ThreadPool::join() {
+void ThreadPool::Impl::join() {
     stdx::unique_lock<Latch> lk(_mutex);
     _join_inlock(&lk);
 }
 
-void ThreadPool::_joinRetired_inlock() {
+void ThreadPool::Impl::_joinRetired_inlock() {
     while (!_retiredThreads.empty()) {
         auto& t = _retiredThreads.front();
         t.join();
-        _options.onJoinRetiredThread(t);
+        if (_options.onJoinRetiredThread)
+            _options.onJoinRetiredThread(t);
         _retiredThreads.pop_front();
     }
 }
 
-void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
-    _stateChange.wait(*lk, [this] {
-        switch (_state) {
-            case preStart:
-                return false;
-            case running:
-                return false;
-            case joinRequired:
-                return true;
-            case joining:
-            case shutdownComplete:
-                LOGV2_FATAL(28700,
-                            "Attempted to join pool {poolName} more than once",
-                            "Attempted to join pool more than once",
-                            "poolName"_attr = _options.poolName);
-        }
-        MONGO_UNREACHABLE;
-    });
+void ThreadPool::Impl::_join_inlock(stdx::unique_lock<Latch>* lk) {
+    _stateChange.wait(*lk, [this] { return _state != preStart && _state != running; });
+    if (_state != joinRequired) {
+        LOGV2_FATAL(28700,
+                    "Attempted to join pool {poolName} more than once",
+                    "Attempted to join pool more than once",
+                    "poolName"_attr = _options.poolName);
+    }
+
     _setState_inlock(joining);
     ++_numIdleThreads;
     if (!_pendingTasks.empty()) {
@@ -184,8 +305,7 @@ void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
     }
     --_numIdleThreads;
     _joinRetired_inlock();
-    ThreadList threadsToJoin;
-    swap(threadsToJoin, _threads);
+    auto threadsToJoin = std::exchange(_threads, {});
     lk->unlock();
     for (auto& t : threadsToJoin) {
         t.join();
@@ -195,14 +315,14 @@ void ThreadPool::_join_inlock(stdx::unique_lock<Latch>* lk) {
     _setState_inlock(shutdownComplete);
 }
 
-void ThreadPool::_drainPendingTasks() {
+void ThreadPool::Impl::_drainPendingTasks() {
     // Tasks cannot be run inline because they can create OperationContexts and the join() caller
     // may already have one associated with the thread.
     stdx::thread cleanThread = stdx::thread([&] {
-        const std::string threadName = str::stream()
-            << _options.threadNamePrefix << _nextThreadId++;
+        const std::string threadName = "{}{}"_format(_options.threadNamePrefix, _nextThreadId++);
         setThreadName(threadName);
-        _options.onCreateThread(threadName);
+        if (_options.onCreateThread)
+            _options.onCreateThread(threadName);
         stdx::unique_lock<Latch> lock(_mutex);
         while (!_pendingTasks.empty()) {
             _doOneTask(&lock);
@@ -211,16 +331,16 @@ void ThreadPool::_drainPendingTasks() {
     cleanThread.join();
 }
 
-void ThreadPool::schedule(Task task) {
+void ThreadPool::Impl::schedule(Task task) {
     stdx::unique_lock<Latch> lk(_mutex);
 
     switch (_state) {
         case joinRequired:
         case joining:
         case shutdownComplete: {
-            auto status = Status(ErrorCodes::ShutdownInProgress,
-                                 str::stream() << "Shutdown of thread pool " << _options.poolName
-                                               << " in progress");
+            auto status =
+                Status(ErrorCodes::ShutdownInProgress,
+                       "Shutdown of thread pool {} in progress"_format(_options.poolName));
 
             lk.unlock();
             task(status);
@@ -246,15 +366,14 @@ void ThreadPool::schedule(Task task) {
     _workAvailable.notify_one();
 }
 
-void ThreadPool::waitForIdle() {
+void ThreadPool::Impl::waitForIdle() {
     stdx::unique_lock<Latch> lk(_mutex);
-    // If there are any pending tasks, or non-idle threads, the pool is not idle.
-    while (!_pendingTasks.empty() || _numIdleThreads < _threads.size()) {
-        _poolIsIdle.wait(lk);
-    }
+    // True when there are no `_pendingTasks` and all `_threads` are idle.
+    auto isIdle = [this] { return _pendingTasks.empty() && _numIdleThreads >= _threads.size(); };
+    _poolIsIdle.wait(lk, isIdle);
 }
 
-ThreadPool::Stats ThreadPool::getStats() const {
+ThreadPool::Stats ThreadPool::Impl::getStats() const {
     stdx::lock_guard<Latch> lk(_mutex);
     Stats result;
     result.options = _options;
@@ -265,95 +384,91 @@ ThreadPool::Stats ThreadPool::getStats() const {
     return result;
 }
 
-void ThreadPool::_workerThreadBody(ThreadPool* pool, const std::string& threadName) noexcept {
+void ThreadPool::Impl::_workerThreadBody(const std::string& threadName) noexcept {
     setThreadName(threadName);
-    pool->_options.onCreateThread(threadName);
-    const auto poolName = pool->_options.poolName;
+    if (_options.onCreateThread)
+        _options.onCreateThread(threadName);
     LOGV2_DEBUG(23104,
                 1,
                 "Starting thread {threadName} in pool {poolName}",
                 "Starting thread",
                 "threadName"_attr = threadName,
-                "poolName"_attr = poolName);
-    pool->_consumeTasks();
-
-    // At this point, another thread may have destroyed "pool", if this thread chose to detach
-    // itself and remove itself from pool->_threads before releasing pool->_mutex.  Do not access
-    // member variables of "pool" from here, on.
-    //
-    // This can happen if this thread decided to retire, got descheduled after removing itself
-    // from _threads and calling detach(), and then the pool was deleted. When this thread resumes,
-    // it is no longer safe to access "pool".
+                "poolName"_attr = _options.poolName);
+    _consumeTasks();
     LOGV2_DEBUG(23105,
                 1,
                 "Shutting down thread {threadName} in pool {poolName}",
                 "Shutting down thread",
                 "threadName"_attr = threadName,
-                "poolName"_attr = poolName);
+                "poolName"_attr = _options.poolName);
 }
 
-void ThreadPool::_consumeTasks() {
+void ThreadPool::Impl::_consumeTasks() {
     stdx::unique_lock<Latch> lk(_mutex);
     while (_state == running) {
-        if (_pendingTasks.empty()) {
-            /**
-             * Help with garbage collecting retired threads to:
-             * * Reduce the memory overhead of _retiredThreads
-             * * Expedite the shutdown process
-             */
-            _joinRetired_inlock();
-
-            if (_threads.size() > _options.minThreads) {
-                // Since there are more than minThreads threads, this thread may be eligible for
-                // retirement. If it isn't now, it may be later, so it must put a time limit on how
-                // long it waits on _workAvailable.
-                const auto now = Date_t::now();
-                const auto nextThreadRetirementDate =
-                    _lastFullUtilizationDate + _options.maxIdleThreadAge;
-                if (now >= nextThreadRetirementDate) {
-                    _lastFullUtilizationDate = now;
-                    LOGV2_DEBUG(23106,
-                                1,
-                                "Reaping this thread; next thread reaped no earlier than "
-                                "{nextThreadRetirementDate}",
-                                "Reaping this thread",
-                                "nextThreadRetirementDate"_attr =
-                                    _lastFullUtilizationDate + _options.maxIdleThreadAge);
-                    break;
-                }
-
-                LOGV2_DEBUG(23107,
-                            3,
-                            "Not reaping this thread because the earliest retirement date is "
+        if (!_pendingTasks.empty()) {
+            _doOneTask(&lk);
+            continue;
+        }
+
+        // Help with garbage collecting retired threads to reduce the
+        // memory overhead of _retiredThreads and expedite the shutdown
+        // process.
+        _joinRetired_inlock();
+
+        boost::optional<Date_t> waitDeadline;
+
+        if (_threads.size() > _options.minThreads) {
+            // Since there are more than minThreads threads, this thread may be eligible for
+            // retirement. If it isn't now, it may be later, so it must put a time limit on how
+            // long it waits on _workAvailable.
+            const auto now = Date_t::now();
+            const auto nextRetirement = _lastFullUtilizationDate + _options.maxIdleThreadAge;
+            if (now >= nextRetirement) {
+                _lastFullUtilizationDate = now;
+                LOGV2_DEBUG(23106,
+                            1,
+                            "Reaping this thread; next thread reaped no earlier than "
                             "{nextThreadRetirementDate}",
-                            "Not reaping this thread",
-                            "nextThreadRetirementDate"_attr = nextThreadRetirementDate);
-                MONGO_IDLE_THREAD_BLOCK;
-                _workAvailable.wait_until(lk, nextThreadRetirementDate.toSystemTimePoint());
-            } else {
-                // Since the number of threads is not more than minThreads, this thread is not
-                // eligible for retirement. It is OK to sleep until _workAvailable is signaled,
-                // because any new threads that put the number of total threads above minThreads
-                // would be eligible for retirement once they had no work left to do.
-                LOGV2_DEBUG(23108,
-                            3,
-                            "Waiting for work; the thread pool size is {numThreads}; the minimum "
-                            "number of threads is {minThreads}",
-                            "Waiting for work",
-                            "numThreads"_attr = _threads.size(),
-                            "minThreads"_attr = _options.minThreads);
-                MONGO_IDLE_THREAD_BLOCK;
-                _workAvailable.wait(lk);
+                            "Reaping this thread",
+                            "nextThreadRetirementDate"_attr =
+                                _lastFullUtilizationDate + _options.maxIdleThreadAge);
+                break;
             }
-            continue;
+
+            LOGV2_DEBUG(23107,
+                        3,
+                        "Not reaping this thread because the earliest retirement date is "
+                        "{nextThreadRetirementDate}",
+                        "Not reaping this thread",
+                        "nextThreadRetirementDate"_attr = nextRetirement);
+            waitDeadline = nextRetirement;
+        } else {
+            // Since the number of threads is not more than minThreads, this thread is not
+            // eligible for retirement. It is OK to sleep until _workAvailable is signaled,
+            // because any new threads that put the number of total threads above minThreads
+            // would be eligible for retirement once they had no work left to do.
+            LOGV2_DEBUG(23108,
+                        3,
+                        "Waiting for work; the thread pool size is {numThreads}; the minimum "
+                        "number of threads is {minThreads}",
+                        "Waiting for work",
+                        "numThreads"_attr = _threads.size(),
+                        "minThreads"_attr = _options.minThreads);
         }
 
-        _doOneTask(&lk);
+        auto wake = [&] { return _state != running || !_pendingTasks.empty(); };
+        MONGO_IDLE_THREAD_BLOCK;
+        if (waitDeadline) {
+            _workAvailable.wait_until(lk, waitDeadline->toSystemTimePoint(), wake);
+        } else {
+            _workAvailable.wait(lk, wake);
+        }
     }
 
     // We still hold the lock, but this thread is retiring. If the whole pool is shutting down, this
     // thread lends a hand in draining the work pool and returns so it can be joined. Otherwise, it
-    // falls through to the detach code, below.
+    // falls through to the thread retirement code, below.
 
     if (_state == joinRequired || _state == joining) {
         // Drain the leftover pending tasks.
@@ -375,29 +490,22 @@ void ThreadPool::_consumeTasks() {
                             "expectedState"_attr = static_cast<int32_t>(running));
     }
 
-    // This thread is ending because it was idle for too long.  Find self in _threads, remove self
-    // from _threads, and add self to the list of retired threads.
-    for (size_t i = 0; i < _threads.size(); ++i) {
-        auto& t = _threads[i];
-        if (t.get_id() != stdx::this_thread::get_id()) {
-            continue;
-        }
-        std::swap(t, _threads.back());
-        _retiredThreads.push_back(std::move(_threads.back()));
-        _threads.pop_back();
-        return;
+    // This thread is ending because it was idle for too long.
+    // Move self from _threads to _retiredThreads.
+    auto selfId = stdx::this_thread::get_id();
+    auto pos = std::find_if(
+        _threads.begin(), _threads.end(), [&](auto&& t) { return t.get_id() == selfId; });
+    if (pos == _threads.end()) {
+        LOGV2_FATAL_NOTRACE(28703,
+                            "Could not find thread with id {threadId} in pool {poolName}",
+                            "Could not find thread",
+                            "threadId"_attr = threadIdToString(selfId),
+                            "poolName"_attr = _options.poolName);
     }
-
-    std::ostringstream threadId;
-    threadId << stdx::this_thread::get_id();
-    LOGV2_FATAL_NOTRACE(28703,
-                        "Could not find thread with id {threadId} in pool {poolName}",
-                        "Could not find thread",
-                        "threadId"_attr = threadId.str(),
-                        "poolName"_attr = _options.poolName);
+    _retiredThreads.splice(_retiredThreads.end(), _threads, pos);
 }
 
-void ThreadPool::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
+void ThreadPool::Impl::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
     invariant(!_pendingTasks.empty());
     LOGV2_DEBUG(23109,
                 3,
@@ -416,7 +524,7 @@ void ThreadPool::_doOneTask(stdx::unique_lock<Latch>* lk) noexcept {
     }
 }
 
-void ThreadPool::_startWorkerThread_inlock() {
+void ThreadPool::Impl::_startWorkerThread_inlock() {
     switch (_state) {
         case preStart:
             LOGV2_DEBUG(
@@ -452,9 +560,9 @@ void ThreadPool::_startWorkerThread_inlock() {
         return;
     }
     invariant(_threads.size() < _options.maxThreads);
-    const std::string threadName = str::stream() << _options.threadNamePrefix << _nextThreadId++;
+    std::string threadName = "{}{}"_format(_options.threadNamePrefix, _nextThreadId++);
     try {
-        _threads.emplace_back([this, threadName] { _workerThreadBody(this, threadName); });
+        _threads.emplace_back([this, threadName] { _workerThreadBody(threadName); });
         ++_numIdleThreads;
     } catch (const std::exception& ex) {
         LOGV2_ERROR(23113,
@@ -468,7 +576,7 @@ void ThreadPool::_startWorkerThread_inlock() {
     }
 }
 
-void ThreadPool::_setState_inlock(const LifecycleState newState) {
+void ThreadPool::Impl::_setState_inlock(const LifecycleState newState) {
     if (newState == _state) {
         return;
     }
@@ -476,4 +584,35 @@ void ThreadPool::_setState_inlock(const LifecycleState newState) {
     _stateChange.notify_all();
 }
 
+// ========================================
+// ThreadPool public functions that simply forward to the `_impl`.
+
+ThreadPool::ThreadPool(Options options) : _impl{std::make_unique<Impl>(std::move(options))} {}
+
+ThreadPool::~ThreadPool() = default;
+
+void ThreadPool::startup() {
+    _impl->startup();
+}
+
+void ThreadPool::shutdown() {
+    _impl->shutdown();
+}
+
+void ThreadPool::join() {
+    _impl->join();
+}
+
+void ThreadPool::schedule(Task task) {
+    _impl->schedule(std::move(task));
+}
+
+void ThreadPool::waitForIdle() {
+    _impl->waitForIdle();
+}
+
+ThreadPool::Stats ThreadPool::getStats() const {
+    return _impl->getStats();
+}
+
 }  // namespace mongo
diff --git a/src/mongo/util/concurrency/thread_pool.h b/src/mongo/util/concurrency/thread_pool.h
index a6e56f8c9bf..29acd9e09c0 100644
--- a/src/mongo/util/concurrency/thread_pool.h
+++ b/src/mongo/util/concurrency/thread_pool.h
@@ -29,47 +29,52 @@
 
 #pragma once
 
-#include <deque>
 #include <functional>
+#include <memory>
 #include <string>
-#include <vector>
 
-#include "mongo/platform/mutex.h"
-#include "mongo/stdx/condition_variable.h"
 #include "mongo/stdx/thread.h"
 #include "mongo/util/concurrency/thread_pool_interface.h"
-#include "mongo/util/hierarchical_acquisition.h"
+#include "mongo/util/duration.h"
 #include "mongo/util/time_support.h"
 
 namespace mongo {
 
-class Status;
-
 /**
  * A configurable thread pool, for general use.
  *
  * See the Options struct for information about how to configure an instance.
  */
 class ThreadPool final : public ThreadPoolInterface {
-    ThreadPool(const ThreadPool&) = delete;
-    ThreadPool& operator=(const ThreadPool&) = delete;
-
 public:
-    struct Limits;
+    /**
+     * Contains a subset of the fields from Options related to limiting the number of concurrent
+     * threads in the pool. Used in places where we want a way to specify limits to the size of a
+     * ThreadPool without overriding the other behaviors of the pool such thread names or onCreate
+     * behaviors. Each field of Limits maps directly to the same-named field in Options.
+     */
+    struct Limits {
+        size_t minThreads = 1;
+        size_t maxThreads = 8;
+        Milliseconds maxIdleThreadAge = Seconds{30};
+    };
 
     /**
      * Structure used to configure an instance of ThreadPool.
      */
     struct Options {
-
-        Options() = default;
-        explicit Options(const Limits& limits);
-
         // Set maxThreads to this if you don't want to limit the number of threads in the pool.
         // Note: the value used here is high enough that it will never be reached, but low enough
         // that it won't cause overflows if mixed with signed ints or math.
         static constexpr size_t kUnlimited = 1'000'000'000;
 
+        Options() = default;
+
+        explicit Options(const Limits& limits)
+            : minThreads(limits.minThreads),
+              maxThreads(limits.maxThreads),
+              maxIdleThreadAge(limits.maxIdleThreadAge) {}
+
         // Name of the thread pool. If this string is empty, the pool will be assigned a
         // name unique to the current process.
         std::string poolName;
@@ -95,29 +100,15 @@ public:
         // a thread.
         Milliseconds maxIdleThreadAge = Seconds{30};
 
-        // This function is run before each worker thread begins consuming tasks.
-        using OnCreateThreadFn = std::function<void(const std::string& threadName)>;
-        OnCreateThreadFn onCreateThread = [](const std::string&) {};
+        /** If callable, called before each worker thread begins consuming tasks. */
+        std::function<void(const std::string&)> onCreateThread;
 
         /**
-         * This function is called after joining each retired thread.
+         * If callable, called after joining each retired thread.
          * Since there could be multiple calls to this function in a single critical section,
          * avoid complex logic in the callback.
          */
-        using OnJoinRetiredThreadFn = std::function<void(const stdx::thread&)>;
-        OnJoinRetiredThreadFn onJoinRetiredThread = [](const stdx::thread&) {};
-    };
-
-    /**
-     * Contains a subset of the fields from Options related to limiting the number of concurrent
-     * threads in the pool. Used in places where we want a way to specify limits to the size of a
-     * ThreadPool without overriding the other behaviors of the pool such thread names or onCreate
-     * behaviors. Each field of Limits maps directly to the same-named field in Options.
-     */
-    struct Limits {
-        size_t minThreads = 1;
-        size_t maxThreads = 8;
-        Milliseconds maxIdleThreadAge = Seconds{30};
+        std::function<void(const stdx::thread&)> onJoinRetiredThread;
     };
 
     /**
@@ -145,12 +136,18 @@ public:
      */
     explicit ThreadPool(Options options);
 
+    ThreadPool(const ThreadPool&) = delete;
+    ThreadPool& operator=(const ThreadPool&) = delete;
+
     ~ThreadPool() override;
 
+    // from OutOfLineExecutor (base of ThreadPoolInterface)
+    void schedule(Task task) override;
+
+    // from ThreadPoolInterface
     void startup() override;
     void shutdown() override;
     void join() override;
-    void schedule(Task task) override;
 
     /**
      * Blocks the caller until there are no pending tasks on this pool.
@@ -170,120 +167,8 @@ public:
     Stats getStats() const;
 
 private:
-    using TaskList = std::deque<Task>;
-    using ThreadList = std::vector<stdx::thread>;
-    using RetiredThreadList = std::list<stdx::thread>;
-
-    /**
-     * Representation of the stage of life of a thread pool.
-     *
-     * A pool starts out in the preStart state, and ends life in the shutdownComplete state.  Work
-     * may only be scheduled in the preStart and running states. Threads may only be started in the
-     * running state. In shutdownComplete, there are no remaining threads or pending tasks to
-     * execute.
-     *
-     * Diagram of legal transitions:
-     *
-     * preStart -> running -> joinRequired -> joining -> shutdownComplete
-     *        \               ^
-     *         \_____________/
-     */
-    enum LifecycleState { preStart, running, joinRequired, joining, shutdownComplete };
-
-    /**
-     * This is the thread body for worker threads.  It is a static member function,
-     * because late in its execution it is possible for the pool to have been destroyed.
-     * As such, it is advisable to pass the pool pointer as an explicit argument, rather
-     * than as the implicit "this" argument.
-     */
-    static void _workerThreadBody(ThreadPool* pool, const std::string& threadName) noexcept;
-
-    /**
-     * Starts a worker thread, unless _options.maxThreads threads are already running or
-     * _state is not running.
-     */
-    void _startWorkerThread_inlock();
-
-    /**
-     * This is the run loop of a worker thread, invoked by _workerThreadBody.
-     */
-    void _consumeTasks();
-
-    /**
-     * Implementation of shutdown once _mutex is locked.
-     */
-    void _shutdown_inlock();
-
-    /**
-     * Implementation of join once _mutex is owned by "lk".
-     */
-    void _join_inlock(stdx::unique_lock<Latch>* lk);
-
-    /**
-     * Runs the remaining tasks on a new thread as part of the join process, blocking until
-     * complete. Caller must not hold the mutex!
-     */
-    void _drainPendingTasks();
-
-    /**
-     * Executes one task from _pendingTasks. "lk" must own _mutex, and _pendingTasks must have at
-     * least one entry.
-     */
-    void _doOneTask(stdx::unique_lock<Latch>* lk) noexcept;
-
-    /**
-     * Changes the lifecycle state (_state) of the pool and wakes up any threads waiting for a state
-     * change. Has no effect if _state == newState.
-     */
-    void _setState_inlock(LifecycleState newState);
-
-    /**
-     * Waits for all remaining retired threads to join.
-     * If a thread's _workerThreadBody() were ever to attempt to reacquire
-     * ThreadPool::_mutex after that thread had been added to _retiredThreads,
-     * it could cause a deadlock.
-     */
-    void _joinRetired_inlock();
-
-    // These are the options with which the pool was configured at construction time.
-    const Options _options;
-
-    // Mutex guarding all non-const member variables.
-    mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ThreadPool::_mutex");
-
-    // This variable represents the lifecycle state of the pool.
-    //
-    // Work may only be scheduled in states preStart and running, and only executes in states
-    // running and shuttingDown.
-    LifecycleState _state = preStart;
-
-    // Condition signaled to indicate that there is work in the _pendingTasks queue, or
-    // that the system is shutting down.
-    stdx::condition_variable _workAvailable;
-
-    // Condition signaled to indicate that there is no work in the _pendingTasks queue.
-    stdx::condition_variable _poolIsIdle;
-
-    // Condition variable signaled whenever _state changes.
-    stdx::condition_variable _stateChange;
-
-    // Queue of yet-to-be-executed tasks.
-    TaskList _pendingTasks;
-
-    // List of threads serving as the worker pool.
-    ThreadList _threads;
-
-    // List of threads that are retired and pending join
-    RetiredThreadList _retiredThreads;
-
-    // Count of idle threads.
-    size_t _numIdleThreads = 0;
-
-    // Id counter for assigning thread names
-    size_t _nextThreadId = 0;
-
-    // The last time that _pendingTasks.size() grew to be at least _threads.size().
-    Date_t _lastFullUtilizationDate;
+    class Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 }  // namespace mongo
diff --git a/src/mongo/util/fail_point.cpp b/src/mongo/util/fail_point.cpp
index c0a28ddb3ac..e467ff2d9fb 100644
--- a/src/mongo/util/fail_point.cpp
+++ b/src/mongo/util/fail_point.cpp
@@ -70,7 +70,7 @@ void FailPoint::setThreadPRNGSeed(int32_t seed) {
     threadPrng = PseudoRandom(seed);
 }
 
-FailPoint::FailPoint() = default;
+FailPoint::FailPoint(std::string name) : _name(std::move(name)) {}
 
 void FailPoint::_shouldFailCloseBlock() {
     _fpInfo.subtractAndFetch(1);
@@ -286,8 +286,8 @@ BSONObj FailPoint::toBSON() const {
     return builder.obj();
 }
 
-FailPointRegisterer::FailPointRegisterer(const std::string& name, FailPoint* fp) {
-    uassertStatusOK(globalFailPointRegistry().add(name, fp));
+FailPointRegisterer::FailPointRegisterer(FailPoint* fp) {
+    uassertStatusOK(globalFailPointRegistry().add(fp));
 }
 
 FailPointRegistry& globalFailPointRegistry() {
@@ -309,12 +309,18 @@ auto setGlobalFailPoint(const std::string& failPointName, const BSONObj& cmdObj)
     return timesEntered;
 }
 
-FailPointEnableBlock::FailPointEnableBlock(std::string failPointName)
-    : FailPointEnableBlock(std::move(failPointName), {}) {}
+FailPointEnableBlock::FailPointEnableBlock(StringData failPointName)
+    : FailPointEnableBlock(failPointName, {}) {}
+
+FailPointEnableBlock::FailPointEnableBlock(StringData failPointName, BSONObj data)
+    : FailPointEnableBlock(globalFailPointRegistry().find(failPointName), std::move(data)) {}
+
+FailPointEnableBlock::FailPointEnableBlock(FailPoint* failPoint)
+    : FailPointEnableBlock(failPoint, {}) {}
+
+FailPointEnableBlock::FailPointEnableBlock(FailPoint* failPoint, BSONObj data)
+    : _failPoint(failPoint) {
 
-FailPointEnableBlock::FailPointEnableBlock(std::string failPointName, BSONObj data)
-    : _failPointName(std::move(failPointName)) {
-    _failPoint = globalFailPointRegistry().find(_failPointName);
     invariant(_failPoint != nullptr);
 
     _initialTimesEntered = _failPoint->setMode(FailPoint::alwaysOn, 0, std::move(data));
@@ -322,7 +328,7 @@ FailPointEnableBlock::FailPointEnableBlock(std::string failPointName, BSONObj da
     LOGV2_WARNING(23830,
                   "Set failpoint {failPointName} to: {failPoint}",
                   "Set failpoint",
-                  "failPointName"_attr = _failPointName,
+                  "failPointName"_attr = _failPoint->getName(),
                   "failPoint"_attr = _failPoint->toBSON());
 }
 
@@ -331,24 +337,25 @@ FailPointEnableBlock::~FailPointEnableBlock() {
     LOGV2_WARNING(23831,
                   "Set failpoint {failPointName} to: {failPoint}",
                   "Set failpoint",
-                  "failPointName"_attr = _failPointName,
+                  "failPointName"_attr = _failPoint->getName(),
                   "failPoint"_attr = _failPoint->toBSON());
 }
 
 FailPointRegistry::FailPointRegistry() : _frozen(false) {}
 
-Status FailPointRegistry::add(const std::string& name, FailPoint* failPoint) {
+Status FailPointRegistry::add(FailPoint* failPoint) {
     if (_frozen) {
         return {ErrorCodes::CannotMutateObject, "Registry is already frozen"};
     }
-    auto [pos, ok] = _fpMap.insert({name, failPoint});
+    auto [pos, ok] = _fpMap.insert({failPoint->getName(), failPoint});
     if (!ok) {
-        return {ErrorCodes::Error(51006), "Fail point already registered: {}"_format(name)};
+        return {ErrorCodes::Error(51006),
+                "Fail point already registered: {}"_format(failPoint->getName())};
     }
     return Status::OK();
 }
 
-FailPoint* FailPointRegistry::find(const std::string& name) const {
+FailPoint* FailPointRegistry::find(StringData name) const {
     auto iter = _fpMap.find(name);
     return (iter == _fpMap.end()) ? nullptr : iter->second;
 }
diff --git a/src/mongo/util/fail_point.h b/src/mongo/util/fail_point.h
index 5322fad8b67..af02e9f1622 100644
--- a/src/mongo/util/fail_point.h
+++ b/src/mongo/util/fail_point.h
@@ -40,6 +40,7 @@
 #include "mongo/stdx/unordered_map.h"
 #include "mongo/util/duration.h"
 #include "mongo/util/interruptible.h"
+#include "mongo/util/string_map.h"
 
 namespace mongo {
 
@@ -202,11 +203,15 @@ public:
      */
     static StatusWith<ModeOptions> parseBSON(const BSONObj& obj);
 
-    FailPoint();
+    explicit FailPoint(std::string name);
 
     FailPoint(const FailPoint&) = delete;
     FailPoint& operator=(const FailPoint&) = delete;
 
+    const std::string& getName() const {
+        return _name;
+    }
+
     /**
      * Returns true if fail point is active.
      *
@@ -423,6 +428,8 @@ private:
     AtomicWord<int> _timesOrPeriod{0};
     BSONObj _data;
 
+    const std::string _name;
+
     // protects _mode, _timesOrPeriod, _data
     mutable Mutex _modMutex = MONGO_MAKE_LATCH("FailPoint::_modMutex");
 };
@@ -439,12 +446,12 @@ public:
      *     51006 - if the given name already exists in this registry.
      *     CannotMutateObject - if this registry is already frozen.
      */
-    Status add(const std::string& name, FailPoint* failPoint);
+    Status add(FailPoint* failPoint);
 
     /**
      * @return a registered FailPoint, or nullptr if it was not registered.
      */
-    FailPoint* find(const std::string& name) const;
+    FailPoint* find(StringData name) const;
 
     /**
      * Freezes this registry from being modified.
@@ -460,7 +467,7 @@ public:
 
 private:
     bool _frozen;
-    stdx::unordered_map<std::string, FailPoint*> _fpMap;
+    StringMap<FailPoint*> _fpMap;
 };
 
 /**
@@ -468,10 +475,15 @@ private:
  */
 class FailPointEnableBlock {
 public:
-    explicit FailPointEnableBlock(std::string failPointName);
-    FailPointEnableBlock(std::string failPointName, BSONObj data);
+    explicit FailPointEnableBlock(StringData failPointName);
+    FailPointEnableBlock(StringData failPointName, BSONObj data);
+    explicit FailPointEnableBlock(FailPoint* failPoint);
+    FailPointEnableBlock(FailPoint* failPoint, BSONObj data);
     ~FailPointEnableBlock();
 
+    FailPointEnableBlock(const FailPointEnableBlock&) = delete;
+    FailPointEnableBlock& operator=(const FailPointEnableBlock&) = delete;
+
     // Const access to the underlying FailPoint
     const FailPoint* failPoint() const {
         return _failPoint;
@@ -488,8 +500,7 @@ public:
     }
 
 private:
-    std::string _failPointName;
-    FailPoint* _failPoint;
+    FailPoint* const _failPoint;
     FailPoint::EntryCountT _initialTimesEntered;
 };
 
@@ -507,7 +518,7 @@ FailPoint::EntryCountT setGlobalFailPoint(const std::string& failPointName, cons
  */
 class FailPointRegisterer {
 public:
-    FailPointRegisterer(const std::string& name, FailPoint* fp);
+    explicit FailPointRegisterer(FailPoint* fp);
 };
 
 FailPointRegistry& globalFailPointRegistry();
@@ -518,8 +529,8 @@ FailPointRegistry& globalFailPointRegistry();
  * Never use in header files, only .cpp files.
  */
 #define MONGO_FAIL_POINT_DEFINE(fp) \
-    ::mongo::FailPoint fp;          \
-    ::mongo::FailPointRegisterer fp##failPointRegisterer(#fp, &fp);
+    ::mongo::FailPoint fp(#fp);     \
+    ::mongo::FailPointRegisterer fp##failPointRegisterer(&fp);
 
 
 }  // namespace mongo
diff --git a/src/mongo/util/fail_point_test.cpp b/src/mongo/util/fail_point_test.cpp
index 26b051fb7dc..a3c346594c9 100644
--- a/src/mongo/util/fail_point_test.cpp
+++ b/src/mongo/util/fail_point_test.cpp
@@ -55,12 +55,12 @@ namespace stdx = mongo::stdx;
 
 namespace mongo_test {
 TEST(FailPoint, InitialState) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     ASSERT_FALSE(failPoint.shouldFail());
 }
 
 TEST(FailPoint, AlwaysOn) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn);
     ASSERT(failPoint.shouldFail());
 
@@ -74,7 +74,7 @@ TEST(FailPoint, AlwaysOn) {
 }
 
 TEST(FailPoint, NTimes) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::nTimes, 4);
     ASSERT(failPoint.shouldFail());
     ASSERT(failPoint.shouldFail());
@@ -87,14 +87,14 @@ TEST(FailPoint, NTimes) {
 }
 
 TEST(FailPoint, BlockOff) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     bool called = false;
     failPoint.execute([&](const BSONObj&) { called = true; });
     ASSERT_FALSE(called);
 }
 
 TEST(FailPoint, BlockAlwaysOn) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn);
     bool called = false;
 
@@ -104,7 +104,7 @@ TEST(FailPoint, BlockAlwaysOn) {
 }
 
 TEST(FailPoint, BlockNTimes) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::nTimes, 1);
     size_t counter = 0;
 
@@ -116,7 +116,7 @@ TEST(FailPoint, BlockNTimes) {
 }
 
 TEST(FailPoint, BlockWithException) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn);
     bool threw = false;
 
@@ -134,7 +134,7 @@ TEST(FailPoint, BlockWithException) {
 }
 
 TEST(FailPoint, SetGetParam) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn, 0, BSON("x" << 20));
 
     failPoint.execute([&](const BSONObj& data) { ASSERT_EQUALS(20, data["x"].numberInt()); });
@@ -143,12 +143,13 @@ TEST(FailPoint, SetGetParam) {
 class FailPointStress : public mongo::unittest::Test {
 public:
     void setUp() {
-        _fp.setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
+        _fp = std::make_unique<FailPoint>("testFP");
+        _fp->setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
     }
 
     void tearDown() {
         // Note: This can loop indefinitely if reference counter was off
-        _fp.setMode(FailPoint::off, 0, BSON("a" << 66));
+        _fp->setMode(FailPoint::off, 0, BSON("a" << 66));
     }
 
     void startTest() {
@@ -174,7 +175,7 @@ public:
 private:
     void blockTask() {
         while (true) {
-            _fp.execute([](const BSONObj& data) {
+            _fp->execute([](const BSONObj& data) {
                 // Expanded ASSERT_EQUALS since the error is not being
                 // printed out properly
                 if (data["a"].numberInt() != 44) {
@@ -196,7 +197,7 @@ private:
     void blockWithExceptionTask() {
         while (true) {
             try {
-                _fp.execute([](const BSONObj& data) {
+                _fp->execute([](const BSONObj& data) {
                     if (data["a"].numberInt() != 44) {
                         using namespace mongo::literals;
                         LOGV2_ERROR(24130,
@@ -219,7 +220,7 @@ private:
 
     void simpleTask() {
         while (true) {
-            static_cast<void>(MONGO_unlikely(_fp.shouldFail()));
+            static_cast<void>(MONGO_unlikely(_fp->shouldFail()));
             stdx::lock_guard<mongo::Latch> lk(_mutex);
             if (_inShutdown)
                 break;
@@ -228,10 +229,10 @@ private:
 
     void flipTask() {
         while (true) {
-            if (_fp.shouldFail()) {
-                _fp.setMode(FailPoint::off, 0);
+            if (_fp->shouldFail()) {
+                _fp->setMode(FailPoint::off, 0);
             } else {
-                _fp.setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
+                _fp->setMode(FailPoint::alwaysOn, 0, BSON("a" << 44));
             }
 
             stdx::lock_guard<mongo::Latch> lk(_mutex);
@@ -240,7 +241,7 @@ private:
         }
     }
 
-    FailPoint _fp;
+    std::unique_ptr<FailPoint> _fp;
     std::vector<stdx::thread> _tasks;
 
     mongo::Mutex _mutex = MONGO_MAKE_LATCH();
@@ -249,7 +250,7 @@ private:
 
 TEST_F(FailPointStress, Basic) {
     startTest();
-    mongo::sleepsecs(30);
+    mongo::sleepsecs(5);
     stopTest();
 }
 
@@ -277,7 +278,7 @@ static int64_t runParallelFailPointTest(FailPoint::Mode fpMode,
                                         const int32_t numEncountersPerThread) {
     ASSERT_GT(numThreads, 0);
     ASSERT_GT(numEncountersPerThread, 0);
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(fpMode, fpVal);
     std::vector<stdx::thread*> tasks;
     std::vector<int64_t> counts(numThreads, 0);
@@ -398,7 +399,7 @@ TEST(FailPoint, parseBSONValidDataSucceeds) {
     ASSERT_TRUE(swTuple.isOK());
 }
 
-TEST(FailPoint, FailPointBlockBasicTest) {
+TEST(FailPoint, FailPointEnableBlockBasicTest) {
     auto failPoint = mongo::globalFailPointRegistry().find("dummy");
 
     ASSERT_FALSE(failPoint->shouldFail());
@@ -411,8 +412,21 @@ TEST(FailPoint, FailPointBlockBasicTest) {
     ASSERT_FALSE(failPoint->shouldFail());
 }
 
-TEST(FailPoint, FailPointBlockIfBasicTest) {
-    FailPoint failPoint;
+TEST(FailPoint, FailPointEnableBlockByPointer) {
+    auto failPoint = mongo::globalFailPointRegistry().find("dummy");
+
+    ASSERT_FALSE(failPoint->shouldFail());
+
+    {
+        FailPointEnableBlock dummyFp(failPoint);
+        ASSERT_TRUE(failPoint->shouldFail());
+    }
+
+    ASSERT_FALSE(failPoint->shouldFail());
+}
+
+TEST(FailPoint, ExecuteIfBasicTest) {
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::nTimes, 1, BSON("skip" << true));
     {
         bool hit = false;
@@ -463,7 +477,7 @@ void assertFunctionInterruptable(std::function<void(Interruptible* interruptible
 }
 
 TEST(FailPoint, PauseWhileSetInterruptibility) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn);
 
     assertFunctionInterruptable(
@@ -473,7 +487,7 @@ TEST(FailPoint, PauseWhileSetInterruptibility) {
 }
 
 TEST(FailPoint, WaitForFailPointTimeout) {
-    FailPoint failPoint;
+    FailPoint failPoint("testFP");
     failPoint.setMode(FailPoint::alwaysOn);
 
     assertFunctionInterruptable([&failPoint](Interruptible* interruptible) {
diff --git a/src/mongo/util/invalidating_lru_cache.h b/src/mongo/util/invalidating_lru_cache.h
index 18b9a94c9fa..c8ead4adecc 100644
--- a/src/mongo/util/invalidating_lru_cache.h
+++ b/src/mongo/util/invalidating_lru_cache.h
@@ -196,9 +196,9 @@ public:
      */
     class ValueHandle {
     public:
-        // The two constructors below are present in order to offset the fact that the cache doesn't
-        // support pinning items. Their only usage must be in the authorization mananager for the
-        // internal authentication user.
+        // The three constructors below are present in order to offset the fact that the cache
+        // doesn't support pinning items. Their only usage must be in the authorization mananager
+        // for the internal authentication user.
         explicit ValueHandle(Value&& value)
             : _value(std::make_shared<StoredValue>(nullptr,
                                                    0,
@@ -207,6 +207,10 @@ public:
                                                    CacheNotCausallyConsistent(),
                                                    CacheNotCausallyConsistent())) {}
 
+        explicit ValueHandle(Value&& value, const Time& t)
+            : _value(
+                  std::make_shared<StoredValue>(nullptr, 0, boost::none, std::move(value), t, t)) {}
+
         ValueHandle() = default;
 
         operator bool() const {
@@ -264,15 +268,16 @@ public:
                         Value&& value,
                         const Time& time = CacheNotCausallyConsistent()) {
         LockGuardWithPostUnlockDestructor guard(_mutex);
-        Time timeInStore;
-        _invalidate(&guard, key, _cache.find(key), &timeInStore);
-        if (auto evicted = _cache.add(key,
-                                      std::make_shared<StoredValue>(this,
-                                                                    ++_epoch,
-                                                                    key,
-                                                                    std::forward<Value>(value),
-                                                                    time,
-                                                                    std::max(time, timeInStore)))) {
+        Time currentTime, currentTimeInStore;
+        _invalidate(&guard, key, _cache.find(key), &currentTime, &currentTimeInStore);
+        if (auto evicted =
+                _cache.add(key,
+                           std::make_shared<StoredValue>(this,
+                                                         ++_epoch,
+                                                         key,
+                                                         std::forward<Value>(value),
+                                                         time,
+                                                         std::max(time, currentTimeInStore)))) {
             const auto& evictedKey = evicted->first;
             auto& evictedValue = evicted->second;
 
@@ -310,15 +315,16 @@ public:
                                      Value&& value,
                                      const Time& time = CacheNotCausallyConsistent()) {
         LockGuardWithPostUnlockDestructor guard(_mutex);
-        Time timeInStore;
-        _invalidate(&guard, key, _cache.find(key), &timeInStore);
-        if (auto evicted = _cache.add(key,
-                                      std::make_shared<StoredValue>(this,
-                                                                    ++_epoch,
-                                                                    key,
-                                                                    std::forward<Value>(value),
-                                                                    time,
-                                                                    std::max(time, timeInStore)))) {
+        Time currentTime, currentTimeInStore;
+        _invalidate(&guard, key, _cache.find(key), &currentTime, &currentTimeInStore);
+        if (auto evicted =
+                _cache.add(key,
+                           std::make_shared<StoredValue>(this,
+                                                         ++_epoch,
+                                                         key,
+                                                         std::forward<Value>(value),
+                                                         time,
+                                                         std::max(time, currentTimeInStore)))) {
             const auto& evictedKey = evicted->first;
             auto& evictedValue = evicted->second;
 
@@ -526,10 +532,13 @@ private:
     void _invalidate(LockGuardWithPostUnlockDestructor* guard,
                      const Key& key,
                      typename Cache::iterator it,
+                     Time* outTime = nullptr,
                      Time* outTimeInStore = nullptr) {
         if (it != _cache.end()) {
             auto& storedValue = it->second;
             storedValue->isValid.store(false);
+            if (outTime)
+                *outTime = storedValue->time;
             if (outTimeInStore)
                 *outTimeInStore = storedValue->timeInStore;
             guard->releasePtr(std::move(storedValue));
@@ -545,6 +554,8 @@ private:
         // released and drops to zero
         if (auto evictedValue = itEvicted->second.lock()) {
             evictedValue->isValid.store(false);
+            if (outTime)
+                *outTime = evictedValue->time;
             if (outTimeInStore)
                 *outTimeInStore = evictedValue->timeInStore;
             guard->releasePtr(std::move(evictedValue));
diff --git a/src/mongo/util/invalidating_lru_cache_test.cpp b/src/mongo/util/invalidating_lru_cache_test.cpp
index 282a130af68..8476dfc5c9e 100644
--- a/src/mongo/util/invalidating_lru_cache_test.cpp
+++ b/src/mongo/util/invalidating_lru_cache_test.cpp
@@ -67,11 +67,14 @@ TEST(InvalidatingLRUCacheTest, ValueHandleOperators) {
     TestValueCache cache(1);
     cache.insertOrAssign(100, {"Test value"});
 
+    // Test non-const operators
     {
         auto valueHandle = cache.get(100);
         ASSERT_EQ("Test value", valueHandle->value);
         ASSERT_EQ("Test value", (*valueHandle).value);
     }
+
+    // Test const operators
     {
         const auto valueHandle = cache.get(100);
         ASSERT_EQ("Test value", valueHandle->value);
@@ -473,7 +476,7 @@ void parallelTest(size_t cacheSize, TestFunc doTest) {
 }
 
 TEST(InvalidatingLRUCacheParallelTest, InsertOrAssignThenGet) {
-    parallelTest<TestValueCache>(1, [](auto& cache) mutable {
+    parallelTest<TestValueCache>(1, [](auto& cache) {
         const int key = 100;
         cache.insertOrAssign(key, TestValue{"Parallel tester value"});
 
@@ -501,7 +504,7 @@ TEST(InvalidatingLRUCacheParallelTest, InsertOrAssignAndGet) {
 }
 
 TEST(InvalidatingLRUCacheParallelTest, CacheSizeZeroInsertOrAssignAndGet) {
-    parallelTest<TestValueCache>(0, [](auto& cache) mutable {
+    parallelTest<TestValueCache>(0, [](auto& cache) {
         const int key = 300;
         auto cachedItem = cache.insertOrAssignAndGet(key, TestValue{"Parallel tester value"});
         ASSERT(cachedItem);
@@ -511,12 +514,18 @@ TEST(InvalidatingLRUCacheParallelTest, CacheSizeZeroInsertOrAssignAndGet) {
 }
 
 TEST(InvalidatingLRUCacheParallelTest, AdvanceTime) {
-    AtomicWord<uint64_t> counter{0};
+    AtomicWord<uint64_t> counter{1};
+    Mutex insertOrAssignMutex = MONGO_MAKE_LATCH("ReadThroughCacheBase::_cancelTokenMutex");
 
-    parallelTest<TestValueCacheCausallyConsistent>(0, [&counter](auto& cache) mutable {
+    parallelTest<TestValueCacheCausallyConsistent>(0, [&](auto& cache) {
         const int key = 300;
-        cache.insertOrAssign(
-            key, TestValue{"Parallel tester value"}, Timestamp(counter.fetchAndAdd(1)));
+        {
+            // The calls to insertOrAssign must always pass strictly incrementing time
+            stdx::lock_guard lg(insertOrAssignMutex);
+            cache.insertOrAssign(
+                key, TestValue{"Parallel tester value"}, Timestamp(counter.fetchAndAdd(1)));
+        }
+
         auto latestCached = cache.get(key, CacheCausalConsistency::kLatestCached);
         auto latestKnown = cache.get(key, CacheCausalConsistency::kLatestKnown);
 
diff --git a/src/mongo/util/read_through_cache.h b/src/mongo/util/read_through_cache.h
index 3d5c7bf0923..72b3e7a5771 100644
--- a/src/mongo/util/read_through_cache.h
+++ b/src/mongo/util/read_through_cache.h
@@ -136,10 +136,12 @@ public:
      */
     class ValueHandle {
     public:
-        // The two constructors below are present in order to offset the fact that the cache doesn't
-        // support pinning items. Their only usage must be in the authorization mananager for the
-        // internal authentication user.
+        // The three constructors below are present in order to offset the fact that the cache
+        // doesn't support pinning items. Their only usage must be in the authorization mananager
+        // for the internal authentication user.
         ValueHandle(Value&& value) : _valueHandle({std::move(value), Date_t::min()}) {}
+        ValueHandle(Value&& value, const Time& t)
+            : _valueHandle({std::move(value), Date_t::min()}, t) {}
         ValueHandle() = default;
 
         operator bool() const {
@@ -289,6 +291,16 @@ public:
     }
 
     /**
+     * Acquires the latest value from the cache, or an empty ValueHandle if the key is not present
+     * in the cache.
+     *
+     * Doesn't attempt to lookup, and so doesn't block.
+     */
+    ValueHandle peekLatestCached(const Key& key) {
+        return {_cache.get(key, CacheCausalConsistency::kLatestCached)};
+    }
+
+    /**
      * Invalidates the given 'key' and immediately replaces it with a new value.
      */
     ValueHandle insertOrAssignAndGet(const Key& key, Value&& newValue, Date_t updateWallClockTime) {
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 6a630be33db..9bf8a939b70 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-4.6",
-    "commit": "bb92ab603f22ca84c24af3be7bc9194f44ff3e64"
+    "commit": "a68890f718f74cdc9e9961bf5b33f5b125e853dd"
 }
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index ac7cef167ff..daee3be92a8 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -267,7 +267,7 @@ __session_close(WT_SESSION *wt_session, const char *config)
     SESSION_API_CALL_PREPARE_ALLOWED(session, close, config, cfg);
     WT_UNUSED(cfg);
 
-    WT_ERR(__wt_session_close_internal(session));
+    WT_TRET(__wt_session_close_internal(session));
     session = NULL;
 
 err:
diff --git a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
index 4a322c61998..12a3daeedfc 100755
--- a/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
+++ b/src/third_party/wiredtiger/test/suite/test_rollback_to_stable10.py
@@ -38,7 +38,7 @@ from time import sleep
 def timestamp_str(t):
     return '%x' % t
 
-def retry_rollback(self, name, code):
+def retry_rollback(self, name, txn_session, code):
     retry_limit = 100
     retries = 0
     completed = False
@@ -46,7 +46,12 @@ def retry_rollback(self, name, code):
     while not completed and retries < retry_limit:
         if retries != 0:
             self.pr("Retrying operation for " + name)
+            if txn_session:
+                txn_session.rollback_transaction()
             sleep(0.1)
+            if txn_session:
+                txn_session.begin_transaction('isolation=snapshot')
+                self.pr("Began new transaction for " + name)
         try:
             code()
             completed = True
@@ -164,13 +169,13 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
             # Perform several updates in parallel with checkpoint.
             # Rollbacks may occur when checkpoint is running, so retry as needed.
             self.pr("updates")
-            retry_rollback(self, 'update ds1, e',
+            retry_rollback(self, 'update ds1, e', None,
                            lambda: self.large_updates(uri_1, value_e, ds_1, nrows, 70))
-            retry_rollback(self, 'update ds2, e',
+            retry_rollback(self, 'update ds2, e', None,
                            lambda: self.large_updates(uri_2, value_e, ds_2, nrows, 70))
-            retry_rollback(self, 'update ds1, f',
+            retry_rollback(self, 'update ds1, f', None,
                            lambda: self.large_updates(uri_1, value_f, ds_1, nrows, 80))
-            retry_rollback(self, 'update ds2, f',
+            retry_rollback(self, 'update ds2, f', None,
                            lambda: self.large_updates(uri_2, value_f, ds_2, nrows, 80))
         finally:
             done.set()
@@ -271,12 +276,17 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
         else:
             self.conn.set_timestamp('stable_timestamp=' + timestamp_str(50))
 
-        # Here's the update operation we'll perform, encapsulated so we can easily retry
+        # Here's the update operations we'll perform, encapsulated so we can easily retry
         # it if we get a rollback. Rollbacks may occur when checkpoint is running.
-        def simple_update(cursor, key, value):
-            cursor.set_key(key)
-            cursor.set_value(value)
-            self.assertEquals(cursor.update(), 0)
+        def prepare_range_updates(session, cursor, ds, value, nrows, prepare_config):
+            self.pr("updates")
+            for i in range(1, nrows):
+                key = ds.key(i)
+                cursor.set_key(key)
+                cursor.set_value(value)
+                self.assertEquals(cursor.update(), 0)
+            self.pr("prepare")
+            session.prepare_transaction(prepare_config)
 
         # Create a checkpoint thread
         done = threading.Event()
@@ -289,23 +299,19 @@ class test_rollback_to_stable10(test_rollback_to_stable_base):
             session_p1 = self.conn.open_session()
             cursor_p1 = session_p1.open_cursor(uri_1)
             session_p1.begin_transaction('isolation=snapshot')
-            self.pr("updates 1")
-            for i in range(1, nrows):
-                retry_rollback(self, 'update ds1',
-                               lambda: simple_update(cursor_p1, ds_1.key(i), value_e))
-            self.pr("prepare 1")
-            session_p1.prepare_transaction('prepare_timestamp=' + timestamp_str(69))
+            retry_rollback(self, 'update ds1', session_p1,
+                           lambda: prepare_range_updates(
+                               session_p1, cursor_p1, ds_1, value_e, nrows,
+                               'prepare_timestamp=' + timestamp_str(69)))
 
             # Perform several updates in parallel with checkpoint.
             session_p2 = self.conn.open_session()
             cursor_p2 = session_p2.open_cursor(uri_2)
             session_p2.begin_transaction('isolation=snapshot')
-            self.pr("updates 2")
-            for i in range(1, nrows):
-                retry_rollback(self, 'update ds2',
-                               lambda: simple_update(cursor_p2, ds_2.key(i), value_e))
-            self.pr("prepare 2")
-            session_p2.prepare_transaction('prepare_timestamp=' + timestamp_str(69))
+            retry_rollback(self, 'update ds2', session_p2,
+                           lambda: prepare_range_updates(
+                               session_p2, cursor_p2, ds_2, value_e, nrows,
+                               'prepare_timestamp=' + timestamp_str(69)))
         finally:
             done.set()
             ckpt.join()
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index a0a86731f1c..617a8326582 100755
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -227,6 +227,7 @@ class WiredTigerTestCase(unittest.TestCase):
         if hasattr(self, 'scenarios'):
             assert(len(self.scenarios) == len(dict(self.scenarios)))
         unittest.TestCase.__init__(self, *args, **kwargs)
+        self.skipped = False
         if not self._globalSetup:
             WiredTigerTestCase.globalSetup()
 
@@ -253,6 +254,10 @@ class WiredTigerTestCase(unittest.TestCase):
     def buildDirectory(self):
         return self._builddir
 
+    def skipTest(self, reason):
+        self.skipped = True
+        super(WiredTigerTestCase, self).skipTest(reason)
+
     # Return the wiredtiger_open extension argument for
     # any needed shared library.
     def extensionsConfig(self):
@@ -460,9 +465,10 @@ class WiredTigerTestCase(unittest.TestCase):
             for f in files:
                 os.chmod(os.path.join(root, f), 0o666)
         self.pr('passed=' + str(passed))
+        self.pr('skipped=' + str(self.skipped))
 
         # Clean up unless there's a failure
-        if passed and not WiredTigerTestCase._preserveFiles:
+        if (passed and (not WiredTigerTestCase._preserveFiles)) or self.skipped:
             shutil.rmtree(self.testdir, ignore_errors=True)
         else:
             self.pr('preserving directory ' + self.testdir)
@@ -470,7 +476,7 @@ class WiredTigerTestCase(unittest.TestCase):
         elapsed = time.time() - self.starttime
         if elapsed > 0.001 and WiredTigerTestCase._verbose >= 2:
             print("%s: %.2f seconds" % (str(self), elapsed))
-        if not passed:
+        if (not passed) and (not self.skipped):
             print("ERROR in " + str(self))
             self.pr('FAIL')
             self.pr('preserving directory ' + self.testdir)